Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions 05_src/assignment_chat/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""Gradio entrypoint and high-level request router for TripSmith."""

import gradio as gr

from services.api_service import handle_weather_query, is_weather_query
from services.guardrails import check_guardrails
from services.semantic_service import handle_semantic_query
from services.tools_service import handle_tools_query, is_tools_query


def process_message(message: str, history: list[dict]) -> str:
# Guardrails always run first so blocked content never reaches services.
blocked_response = check_guardrails(message)
if blocked_response:
return blocked_response

# Route to specialized handlers by intent, then fall back to semantic QA.
if is_weather_query(message):
return handle_weather_query(message, history=history)

if is_tools_query(message):
return handle_tools_query(message, history=history)

return handle_semantic_query(message, history=history)


interface = gr.ChatInterface(
fn=process_message,
type="messages",
title="TripSmith: Travel Planner AI",
description="Pragmatic travel planning with weather, knowledge search, and function tools.",
)


if __name__ == "__main__":
interface.launch()
3 changes: 3 additions & 0 deletions 05_src/assignment_chat/chroma_store/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*
!.gitignore
!.gitkeep
19 changes: 19 additions & 0 deletions 05_src/assignment_chat/data/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Dataset Notes

`travel_knowledge.jsonl` is generated from Wikivoyage summaries using:

- script: `build_wikivoyage_dataset.py`
- API endpoint pattern: `https://en.wikivoyage.org/api/rest_v1/page/summary/<Destination>`

Each record stores:

- destination name
- short overview text
- source URL
- license metadata (`CC BY-SA 4.0`)

To rebuild the dataset:

```bash
python build_wikivoyage_dataset.py
```
162 changes: 162 additions & 0 deletions 05_src/assignment_chat/data/build_wikivoyage_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
"""Build a compact travel knowledge JSONL dataset from Wikivoyage summaries."""

from __future__ import annotations

import json
import re
import urllib.parse
import urllib.request
from pathlib import Path


USER_AGENT = "TripSmithDatasetBuilder/1.0 (assignment-2)"
BASE_URL = "https://en.wikivoyage.org/api/rest_v1/page/summary/"
OUTPUT_FILE = Path(__file__).resolve().parent / "travel_knowledge.jsonl"

# Broad destination coverage so semantic search handles common travel questions.
DESTINATIONS = [
"Tokyo",
"Kyoto",
"Osaka",
"Sapporo",
"Seoul",
"Busan",
"Bangkok",
"Chiang Mai",
"Singapore",
"Hong Kong",
"Taipei",
"Beijing",
"Shanghai",
"Hanoi",
"Ho Chi Minh City",
"Bali",
"Jakarta",
"Kuala Lumpur",
"Manila",
"Delhi",
"Mumbai",
"Istanbul",
"Dubai",
"Cairo",
"Marrakesh",
"Cape Town",
"Nairobi",
"Athens",
"Rome",
"Milan",
"Venice",
"Florence",
"Barcelona",
"Madrid",
"Lisbon",
"Porto",
"Paris",
"London",
"Dublin",
"Amsterdam",
"Brussels",
"Berlin",
"Prague",
"Vienna",
"Budapest",
"Zurich",
"Reykjavik",
"Copenhagen",
"Stockholm",
"Oslo",
"Helsinki",
"Warsaw",
"Edinburgh",
"New York City",
"Los Angeles",
"San Francisco",
"Chicago",
"Miami",
"Vancouver",
"Toronto",
"Montreal",
"Mexico City",
"Cancun",
"Havana",
"San Juan",
"Bogota",
"Lima",
"Santiago",
"Buenos Aires",
"Rio de Janeiro",
"Sao Paulo",
"Sydney",
"Melbourne",
"Auckland",
]


def _clean_text(text: str) -> str:
"""Normalize whitespace so stored passages are prompt-friendly."""
text = re.sub(r"\s+", " ", text or "").strip()
return text


def _summary_for_destination(destination: str) -> str:
"""Fetch summary text for a destination from Wikivoyage REST API."""
encoded = urllib.parse.quote(destination.replace(" ", "_"))
req = urllib.request.Request(
BASE_URL + encoded,
headers={"User-Agent": USER_AGENT, "Accept": "application/json"},
method="GET",
)
with urllib.request.urlopen(req, timeout=25) as response:
payload = json.loads(response.read().decode("utf-8", errors="replace"))

# Prefer "extract", then fallback to plain text description.
extract = _clean_text(payload.get("extract", ""))
if extract:
return extract
description = _clean_text(payload.get("description", ""))
return description


def build_dataset() -> list[dict]:
"""Create record list with stable ids and source attribution fields."""
records: list[dict] = []
for idx, destination in enumerate(DESTINATIONS, start=1):
try:
text = _summary_for_destination(destination)
except Exception:
continue
if not text:
continue

title_slug = destination.replace(" ", "_")
records.append(
{
"id": f"wv_{idx:04d}",
"destination": destination,
"topic": "overview",
"source": "Wikivoyage",
"source_url": f"https://en.wikivoyage.org/wiki/{title_slug}",
"license": "CC BY-SA 4.0",
"text": text,
}
)
return records


def write_jsonl(records: list[dict]) -> None:
"""Persist records as UTF-8 JSONL for semantic service ingestion."""
lines = [json.dumps(record, ensure_ascii=True) for record in records]
OUTPUT_FILE.write_text("\n".join(lines) + "\n", encoding="utf-8")


def main() -> None:
"""CLI entrypoint used to refresh dataset before submission."""
records = build_dataset()
if not records:
raise RuntimeError("No records were downloaded from Wikivoyage.")
write_jsonl(records)
print(f"Wrote {len(records)} records to {OUTPUT_FILE}")


if __name__ == "__main__":
main()
Loading