SunflowersLwtech
diff --git a/‎agents/ocr_agent.py‎
Lines changed: 78 additions & 3 deletions b/‎agents/ocr_agent.py‎
Lines changed: 78 additions & 3 deletions
diff --git a/‎agents/orchestrator.py‎
Lines changed: 17 additions & 0 deletions b/‎agents/orchestrator.py‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎agents/vision_agent.py‎
Lines changed: 60 additions & 5 deletions b/‎agents/vision_agent.py‎
Lines changed: 60 additions & 5 deletions
diff --git a/‎context/spatial_change_detector.py‎
Lines changed: 85 additions & 1 deletion b/‎context/spatial_change_detector.py‎
Lines changed: 85 additions & 1 deletion
@@ -38,7 +38,8 @@
 Rules:
 1. Extract every piece of readable text — signs, menus, labels, documents, \
    screens, handwriting.
-2. Classify the text type: "menu", "sign", "document", "label", or "unknown".
+2. Classify the text type: "menu", "sign", "document", "label", \
+   "medicine_label", "receipt", "food_packaging", "business_card", or "unknown".
 3. For menus: parse into individual items with prices when visible. Format \
    each item as "Item Name - $Price" or just "Item Name" if no price. \
    Group items by category when clear (e.g. appetizers, mains, drinks, desserts).
@@ -47,8 +48,32 @@
 6. Report confidence based on text clarity (0.0 = unreadable, 1.0 = crystal clear).
 7. If no text is visible, return empty results with confidence 0.0.
 
+## Specialized Document Types
+
+For medicine labels / prescription bottles:
+- Extract: drug name, dosage/strength, frequency/directions, warnings/side effects, \
+  expiry date, manufacturer. Populate the medicine_info field.
+- Safety-critical: always flag drug interaction warnings and "do not exceed" limits.
+
+For receipts:
+- Extract: store/merchant name, individual items with prices, subtotal, tax, total, \
+  payment method, change given, date/time. Populate the receipt_info field.
+- Read items in order from top to bottom.
+
+For food packaging / nutrition labels:
+- Extract: product name, allergens (CRITICAL for safety — always extract), \
+  calories per serving, serving size, key nutrients, ingredients list. \
+  Populate the nutrition_info field.
+- Allergens are safety-critical — always extract and flag prominently.
+
+For business cards:
+- Extract: person's name, job title, company, phone number(s), email, \
+  website, physical address. Populate the contact_info field.
+- Preserve exact formatting of phone numbers and emails.
+
 Text priority (extract all, but rank by importance):
-1. Safety-critical: warnings, caution signs, traffic signals, hazard labels.
+1. Safety-critical: warnings, caution signs, traffic signals, hazard labels, \
+   allergens, drug warnings.
 2. Actionable: prices, opening hours, directions, instructions, dosage info.
 3. Informational: names, titles, descriptions, news headlines.
 4. Decorative: brand slogans, decorative quotes, background text.
@@ -90,7 +115,8 @@
         ),
         "text_type": types.Schema(
             type=types.Type.STRING,
-            enum=["menu", "sign", "document", "label", "unknown"],
+            enum=["menu", "sign", "document", "label", "medicine_label",
+                  "receipt", "food_packaging", "business_card", "unknown"],
             description="Classification of the dominant text type.",
         ),
         "items": types.Schema(
@@ -102,6 +128,55 @@
             type=types.Type.NUMBER,
             description="Confidence score from 0.0 to 1.0.",
         ),
+        "medicine_info": types.Schema(
+            type=types.Type.OBJECT,
+            nullable=True,
+            properties={
+                "drug_name": types.Schema(type=types.Type.STRING, description="Name of the medication."),
+                "dosage": types.Schema(type=types.Type.STRING, description="Dosage/strength, e.g. '200mg'."),
+                "frequency": types.Schema(type=types.Type.STRING, description="How often to take, e.g. 'every 6 hours'."),
+                "warnings": types.Schema(type=types.Type.ARRAY, items=types.Schema(type=types.Type.STRING), description="Warnings and side effects."),
+                "expiry_date": types.Schema(type=types.Type.STRING, nullable=True, description="Expiration date if visible."),
+            },
+            description="Structured medicine label information.",
+        ),
+        "receipt_info": types.Schema(
+            type=types.Type.OBJECT,
+            nullable=True,
+            properties={
+                "store_name": types.Schema(type=types.Type.STRING, description="Merchant/store name."),
+                "items": types.Schema(type=types.Type.ARRAY, items=types.Schema(type=types.Type.STRING), description="Line items with prices."),
+                "total": types.Schema(type=types.Type.STRING, description="Total amount."),
+                "payment_method": types.Schema(type=types.Type.STRING, nullable=True, description="Payment method if shown."),
+                "change": types.Schema(type=types.Type.STRING, nullable=True, description="Change given if shown."),
+            },
+            description="Structured receipt information.",
+        ),
+        "nutrition_info": types.Schema(
+            type=types.Type.OBJECT,
+            nullable=True,
+            properties={
+                "product_name": types.Schema(type=types.Type.STRING, description="Product name."),
+                "allergens": types.Schema(type=types.Type.ARRAY, items=types.Schema(type=types.Type.STRING), description="Allergen warnings."),
+                "calories": types.Schema(type=types.Type.STRING, description="Calories per serving."),
+                "serving_size": types.Schema(type=types.Type.STRING, description="Serving size."),
+                "ingredients": types.Schema(type=types.Type.STRING, nullable=True, description="Ingredients list."),
+            },
+            description="Structured nutrition/food packaging information.",
+        ),
+        "contact_info": types.Schema(
+            type=types.Type.OBJECT,
+            nullable=True,
+            properties={
+                "name": types.Schema(type=types.Type.STRING, description="Person's name."),
+                "title": types.Schema(type=types.Type.STRING, nullable=True, description="Job title."),
+                "company": types.Schema(type=types.Type.STRING, nullable=True, description="Company name."),
+                "phone": types.Schema(type=types.Type.STRING, nullable=True, description="Phone number(s)."),
+                "email": types.Schema(type=types.Type.STRING, nullable=True, description="Email address."),
+                "address": types.Schema(type=types.Type.STRING, nullable=True, description="Physical address."),
+            },
+            description="Structured business card / contact information.",
+        ),
     },
     required=["text", "text_type", "items", "confidence"],
 )
 
@@ -21,6 +21,7 @@
     convert_to_plus_code,
     extract_text_from_camera,
     get_accessibility_info,
+    get_emergency_help,
     get_location_info,
     get_walking_directions,
     google_search,
@@ -183,13 +184,28 @@ def forget_recent_memory(minutes: int = 30) -> dict:
 
 **"Is this area accessible?"** → ``get_accessibility_info`` (tactile paving, ramps, signals)
 
+**"Help!" / "Emergency" / "I need help" / "Call 911"** → ``get_emergency_help`` — \
+INTERRUPT priority, always override. Provide emergency number, nearest services, \
+and location code immediately. Do not hesitate or ask clarifying questions first.
+
 **General knowledge / fact check** → ``google_search``
 
 **Navigation results** include slope warnings (>8% = ADA threshold) and accessibility info.
 
 ### Automatic Injections (No Tool Call Needed)
 - ``[VISION ANALYSIS]``: Scene understanding — integrate naturally into speech.
+  - When currency is detected, read the denomination clearly and naturally: \
+"That looks like a twenty-dollar bill" (not "Currency detected: USD 20").
+  - When emotions are detected, weave them in warmly: "The person across from \
+you seems to be smiling" (not "Expression: smiling detected").
+  - When light conditions change, mention it during transitions: "You've stepped \
+into a brightly lit space" or "It's quite dim in here".
+  - When motion_direction is "approaching" for vehicles, alert with urgency.
 - ``[OCR RESULT]``: Safety-critical text detected — read aloud when relevant.
+  - For medication labels: read drug name, dosage, and warnings clearly.
+  - For receipts: summarize store, item count, and total.
+  - For food packaging: always read allergens first (safety-critical), then calories.
+  - For business cards: read name, title, and contact info.
 - ``[FACE ID]``: Recognized faces — weave names naturally: "David is sitting across \
 from you" (never "Face recognized: David").
 Do NOT mention analysis systems by name.
@@ -234,6 +250,7 @@ def forget_recent_memory(minutes: int = 30) -> dict:
     "get_accessibility_info": get_accessibility_info,
     "maps_query": maps_query,
     "extract_text_from_camera": extract_text_from_camera,
+    "get_emergency_help": get_emergency_help,
     "preload_memory": preload_memory,
     "remember_entity": remember_entity,
     "what_do_you_remember": what_do_you_remember,
 
@@ -52,11 +52,16 @@
 
 ONLY report immediate physical hazards visible in this image:
 - Stairs, steps, drop-offs, curbs
-- Approaching vehicles or cyclists
+- Approaching vehicles or cyclists — flag with urgency, note direction of \
+approach (e.g., "vehicle approaching from 3 o'clock")
 - Obstacles in the walking path (poles, furniture, construction)
 - Wet/slippery surfaces, uneven ground
 - Low-hanging objects at head height (tree branches, awnings, scaffolding)
 - Overhead obstructions that a cane would not detect
+- Currency or payment-related items in the walking path (dropped money, wallet)
+
+For moving objects (vehicles, cyclists, people), indicate motion direction \
+using the motion_direction field: "approaching", "receding", or "crossing".
 
 Format: "[hazard] at [clock position], [distance estimate]"
 Examples: "Step down at 12 o'clock, 1 meter" / "Low branch at 11 o'clock, head height"
@@ -79,6 +84,12 @@
 3. Signage and wayfinding: readable signs, door numbers, directions.
 4. People: approximate count and proximity (not appearance descriptions).
 5. Key landmarks: counters, elevators, escalators, seating areas.
+6. Currency: identify any visible banknotes or coins. Report denomination \
+and count. If multiple bills/coins, estimate the total.
+7. Emotions: note obvious facial expressions (smiling, frowning, waving, \
+concerned) for people within 3 meters. Use the emotions field.
+
+For moving objects, set motion_direction: "approaching", "receding", or "crossing".
 
 Write the scene description as a natural spoken paragraph, not a bulleted list.
 Use clock positions for spatial references.
@@ -96,12 +107,22 @@
 Provide a comprehensive description as a natural, flowing narrative:
 1. SAFETY: Any hazards (always first priority).
 2. Spatial layout: full description of the space, dimensions, and organization.
-3. People: count, approximate positions, expressions, activities.
+3. People: count, approximate positions, detailed expressions and activities. \
+Use the emotions field for each person: note expression (smiling, laughing, \
+focused, concerned, puzzled) and what they appear to be doing.
 4. Text: all readable text (signs, menus, labels, screens).
 5. Objects: notable items, their positions and material qualities.
-6. Atmosphere: lighting quality (warm, fluorescent, natural), ambient energy \
-(quiet, bustling, peaceful), textures and surfaces, sounds you might infer \
-(traffic hum, conversation murmur, birdsong).
+6. Atmosphere and light: report the light_level field — describe lighting \
+quality in detail (warm incandescent, cool fluorescent, bright daylight, \
+dim indoor, dark, specific light sources like desk lamps or overhead fixtures). \
+Note ambient energy (quiet, bustling, peaceful), textures and surfaces, \
+sounds you might infer (traffic hum, conversation murmur, birdsong).
+7. Currency: identify any visible banknotes or coins with denomination, \
+country of origin if identifiable, and count. Use the currency_detected field.
+8. QR codes / barcodes: note presence and describe any readable content \
+or context clues about what the code links to.
+
+For moving objects, set motion_direction: "approaching", "receding", or "crossing".
 
 Use sensory language: "warm light filtering through large windows" rather than \
 "well-lit room". Describe textures, temperatures, and spatial feelings.
@@ -191,10 +212,44 @@
                         type=types.Type.STRING,
                         description="safety, navigation, interaction, or background.",
                     ),
+                    "motion_direction": types.Schema(
+                        type=types.Type.STRING,
+                        nullable=True,
+                        description="For moving objects: approaching, receding, or crossing.",
+                    ),
                 },
             ),
             description="Structured spatial map of objects with clock positions and distances.",
         ),
+        "light_level": types.Schema(
+            type=types.Type.STRING,
+            nullable=True,
+            description="Lighting quality: bright_daylight, overcast, dim_indoor, dark, fluorescent, warm_ambient, etc.",
+        ),
+        "emotions": types.Schema(
+            type=types.Type.ARRAY,
+            nullable=True,
+            items=types.Schema(
+                type=types.Type.OBJECT,
+                properties={
+                    "person_position": types.Schema(
+                        type=types.Type.STRING,
+                        description="Clock position and distance of the person, e.g. '2 o'clock, 2m'.",
+                    ),
+                    "expression": types.Schema(
+                        type=types.Type.STRING,
+                        description="Observed facial expression: smiling, frowning, laughing, concerned, neutral, etc.",
+                    ),
+                },
+            ),
+            description="Detected facial expressions for nearby people.",
+        ),
+        "currency_detected": types.Schema(
+            type=types.Type.ARRAY,
+            nullable=True,
+            items=types.Schema(type=types.Type.STRING),
+            description="Identified currency: denomination and count, e.g. 'US $20 bill', '2 euro coins'.",
+        ),
     },
     required=[
         "safety_warnings",
 
@@ -16,9 +16,10 @@
 class SpatialChange:
     """A detected change between consecutive vision frames."""
 
-    change_type: str  # "new_person_approaching", "layout_change", "hazard_appeared", "person_left"
+    change_type: str  # "new_person_approaching", "layout_change", "hazard_appeared", "person_left", "vehicle_approaching", "sudden_obstacle", "person_very_close"
     severity: str  # "safety", "significant", "minor"
     details: str
+    urgency: str = "awareness"  # "immediate" (within_reach), "approaching" (1-2m), "awareness" (3m+)
 
 
 class SpatialChangeDetector:
@@ -100,6 +101,81 @@ def detect(
                         details=f"Scene composition changed ({len(prev_labels)} → {len(curr_labels)} objects, {overlap:.0%} overlap)",
                     ))
 
+        # Rule 5: Approaching vehicle — distance decreased between frames
+        prev_vehicles = _extract_objects_by_label(previous.get("spatial_objects", []), "vehicle")
+        curr_vehicles = _extract_objects_by_label(current.get("spatial_objects", []), "vehicle")
+        for v in curr_vehicles:
+            dist = v.get("distance_estimate", "")
+            motion = v.get("motion_direction", "")
+            clock = v.get("clock_position", "")
+            is_close = dist in ("within_reach", "1m", "2m")
+            is_approaching = motion == "approaching"
+            # Check if vehicle was previously farther away
+            was_farther = not any(
+                pv.get("distance_estimate", "") in ("within_reach", "1m", "2m")
+                for pv in prev_vehicles
+            ) if prev_vehicles else False
+            if is_close and (is_approaching or was_farther):
+                urgency = "immediate" if dist == "within_reach" else "approaching"
+                clock_str = f" from {clock} o'clock" if clock else ""
+                changes.append(SpatialChange(
+                    change_type="vehicle_approaching",
+                    severity="safety",
+                    details=f"Vehicle approaching{clock_str}, {dist}",
+                    urgency=urgency,
+                ))
+
+        # Rule 6: Sudden obstacle in path — new object at 11-1 o'clock within 2m
+        prev_obj_keys = {
+            (o.get("label", ""), o.get("clock_position"))
+            for o in previous.get("spatial_objects", [])
+            if isinstance(o, dict)
+        }
+        for obj in current.get("spatial_objects", []):
+            if not isinstance(obj, dict):
+                continue
+            label = obj.get("label", "")
+            clock = obj.get("clock_position")
+            dist = obj.get("distance_estimate", "")
+            salience = obj.get("salience", "")
+            obj_key = (label, clock)
+            if (
+                obj_key not in prev_obj_keys
+                and clock in (11, 12, 1)
+                and dist in ("within_reach", "1m", "2m")
+                and salience in ("safety", "navigation")
+                and label not in ("person",)  # people handled by Rule 2/7
+            ):
+                urgency = "immediate" if dist == "within_reach" else "approaching"
+                changes.append(SpatialChange(
+                    change_type="sudden_obstacle",
+                    severity="safety",
+                    details=f"{label} appeared at {clock} o'clock, {dist}",
+                    urgency=urgency,
+                ))
+
+        # Rule 7: Person very close — person at within_reach distance
+        for obj in current.get("spatial_objects", []):
+            if not isinstance(obj, dict):
+                continue
+            if obj.get("label") == "person" and obj.get("distance_estimate") == "within_reach":
+                clock = obj.get("clock_position", "")
+                # Only flag if this person wasn't already within_reach in previous frame
+                was_close = any(
+                    isinstance(po, dict)
+                    and po.get("label") == "person"
+                    and po.get("distance_estimate") == "within_reach"
+                    for po in previous.get("spatial_objects", [])
+                )
+                if not was_close:
+                    clock_str = f" at {clock} o'clock" if clock else ""
+                    changes.append(SpatialChange(
+                        change_type="person_very_close",
+                        severity="safety",
+                        details=f"Person very close{clock_str}",
+                        urgency="immediate",
+                    ))
+
         # Sort by severity: safety > significant > minor
         severity_order = {"safety": 0, "significant": 1, "minor": 2}
         changes.sort(key=lambda c: severity_order.get(c.severity, 3))
@@ -113,3 +189,11 @@ def _extract_labels(spatial_objects: list) -> set[str]:
         if isinstance(obj, dict) and obj.get("label"):
             labels.add(obj["label"])
     return labels
+
+
+def _extract_objects_by_label(spatial_objects: list, label: str) -> list[dict]:
+    """Extract all spatial objects matching a given label."""
+    return [
+        obj for obj in spatial_objects
+        if isinstance(obj, dict) and obj.get("label") == label
+    ]