UofT-DSI · jhadenn · Feb 27, 2026 · Feb 27, 2026 · Mar 2, 2026
diff --git a/05_src/assignment_chat/app.py b/05_src/assignment_chat/app.py
@@ -0,0 +1,36 @@
+"""Gradio entrypoint and high-level request router for TripSmith."""
+
+import gradio as gr
+
+from services.api_service import handle_weather_query, is_weather_query
+from services.guardrails import check_guardrails
+from services.semantic_service import handle_semantic_query
+from services.tools_service import handle_tools_query, is_tools_query
+
+
+def process_message(message: str, history: list[dict]) -> str:
+    # Guardrails always run first so blocked content never reaches services.
+    blocked_response = check_guardrails(message)
+    if blocked_response:
+        return blocked_response
+
+    # Route to specialized handlers by intent, then fall back to semantic QA.
+    if is_weather_query(message):
+        return handle_weather_query(message, history=history)
+
+    if is_tools_query(message):
+        return handle_tools_query(message, history=history)
+
+    return handle_semantic_query(message, history=history)
+
+
+interface = gr.ChatInterface(
+    fn=process_message,
+    type="messages",
+    title="TripSmith: Travel Planner AI",
+    description="Pragmatic travel planning with weather, knowledge search, and function tools.",
+)
+
+
+if __name__ == "__main__":
+    interface.launch()
diff --git a/05_src/assignment_chat/chroma_store/.gitignore b/05_src/assignment_chat/chroma_store/.gitignore
@@ -0,0 +1,3 @@
+*
+!.gitignore
+!.gitkeep
diff --git a/05_src/assignment_chat/data/README.md b/05_src/assignment_chat/data/README.md
@@ -0,0 +1,19 @@
+# Dataset Notes
+
+`travel_knowledge.jsonl` is generated from Wikivoyage summaries using:
+
+- script: `build_wikivoyage_dataset.py`
+- API endpoint pattern: `https://en.wikivoyage.org/api/rest_v1/page/summary/<Destination>`
+
+Each record stores:
+
+- destination name
+- short overview text
+- source URL
+- license metadata (`CC BY-SA 4.0`)
+
+To rebuild the dataset:
+
+```bash
+python build_wikivoyage_dataset.py
+```
diff --git a/05_src/assignment_chat/data/build_wikivoyage_dataset.py b/05_src/assignment_chat/data/build_wikivoyage_dataset.py
@@ -0,0 +1,162 @@
+"""Build a compact travel knowledge JSONL dataset from Wikivoyage summaries."""
+
+from __future__ import annotations
+
+import json
+import re
+import urllib.parse
+import urllib.request
+from pathlib import Path
+
+
+USER_AGENT = "TripSmithDatasetBuilder/1.0 (assignment-2)"
+BASE_URL = "https://en.wikivoyage.org/api/rest_v1/page/summary/"
+OUTPUT_FILE = Path(__file__).resolve().parent / "travel_knowledge.jsonl"
+
+# Broad destination coverage so semantic search handles common travel questions.
+DESTINATIONS = [
+    "Tokyo",
+    "Kyoto",
+    "Osaka",
+    "Sapporo",
+    "Seoul",
+    "Busan",
+    "Bangkok",
+    "Chiang Mai",
+    "Singapore",
+    "Hong Kong",
+    "Taipei",
+    "Beijing",
+    "Shanghai",
+    "Hanoi",
+    "Ho Chi Minh City",
+    "Bali",
+    "Jakarta",
+    "Kuala Lumpur",
+    "Manila",
+    "Delhi",
+    "Mumbai",
+    "Istanbul",
+    "Dubai",
+    "Cairo",
+    "Marrakesh",
+    "Cape Town",
+    "Nairobi",
+    "Athens",
+    "Rome",
+    "Milan",
+    "Venice",
+    "Florence",
+    "Barcelona",
+    "Madrid",
+    "Lisbon",
+    "Porto",
+    "Paris",
+    "London",
+    "Dublin",
+    "Amsterdam",
+    "Brussels",
+    "Berlin",
+    "Prague",
+    "Vienna",
+    "Budapest",
+    "Zurich",
+    "Reykjavik",
+    "Copenhagen",
+    "Stockholm",
+    "Oslo",
+    "Helsinki",
+    "Warsaw",
+    "Edinburgh",
+    "New York City",
+    "Los Angeles",
+    "San Francisco",
+    "Chicago",
+    "Miami",
+    "Vancouver",
+    "Toronto",
+    "Montreal",
+    "Mexico City",
+    "Cancun",
+    "Havana",
+    "San Juan",
+    "Bogota",
+    "Lima",
+    "Santiago",
+    "Buenos Aires",
+    "Rio de Janeiro",
+    "Sao Paulo",
+    "Sydney",
+    "Melbourne",
+    "Auckland",
+]
+
+
+def _clean_text(text: str) -> str:
+    """Normalize whitespace so stored passages are prompt-friendly."""
+    text = re.sub(r"\s+", " ", text or "").strip()
+    return text
+
+
+def _summary_for_destination(destination: str) -> str:
+    """Fetch summary text for a destination from Wikivoyage REST API."""
+    encoded = urllib.parse.quote(destination.replace(" ", "_"))
+    req = urllib.request.Request(
+        BASE_URL + encoded,
+        headers={"User-Agent": USER_AGENT, "Accept": "application/json"},
+        method="GET",
+    )
+    with urllib.request.urlopen(req, timeout=25) as response:
+        payload = json.loads(response.read().decode("utf-8", errors="replace"))
+
+    # Prefer "extract", then fallback to plain text description.
+    extract = _clean_text(payload.get("extract", ""))
+    if extract:
+        return extract
+    description = _clean_text(payload.get("description", ""))
+    return description
+
+
+def build_dataset() -> list[dict]:
+    """Create record list with stable ids and source attribution fields."""
+    records: list[dict] = []
+    for idx, destination in enumerate(DESTINATIONS, start=1):
+        try:
+            text = _summary_for_destination(destination)
+        except Exception:
+            continue
+        if not text:
+            continue
+
+        title_slug = destination.replace(" ", "_")
+        records.append(
+            {
+                "id": f"wv_{idx:04d}",
+                "destination": destination,
+                "topic": "overview",
+                "source": "Wikivoyage",
+                "source_url": f"https://en.wikivoyage.org/wiki/{title_slug}",
+                "license": "CC BY-SA 4.0",
+                "text": text,
+            }
+        )
+    return records
+
+
+def write_jsonl(records: list[dict]) -> None:
+    """Persist records as UTF-8 JSONL for semantic service ingestion."""
+    lines = [json.dumps(record, ensure_ascii=True) for record in records]
+    OUTPUT_FILE.write_text("\n".join(lines) + "\n", encoding="utf-8")
+
+
+def main() -> None:
+    """CLI entrypoint used to refresh dataset before submission."""
+    records = build_dataset()
+    if not records:
+        raise RuntimeError("No records were downloaded from Wikivoyage.")
+    write_jsonl(records)
+    print(f"Wrote {len(records)} records to {OUTPUT_FILE}")
+
+
+if __name__ == "__main__":
+    main()