UofT-DSI · atsuvovor · Feb 17, 2026 · Feb 18, 2026 · Feb 22, 2026 · Mar 1, 2026
diff --git a/02_activities/assignment_1.ipynb b/02_activities/assignment_1.ipynb
diff --git a/05_src/.secrets.template b/05_src/.secrets.template
diff --git a/05_src/assignment_chat/__init__.py b/05_src/assignment_chat/__init__.py
@@ -0,0 +1,8 @@
+import os
+from dotenv import load_dotenv
+load_dotenv(".env")
+load_dotenv(".secrets")
+
+# Ensure CHROMA_OPENAI_API_KEY is set globally
+if "CHROMA_OPENAI_API_KEY" not in os.environ:
+    os.environ["CHROMA_OPENAI_API_KEY"] = os.getenv("API_GATEWAY_KEY")
diff --git a/05_src/assignment_chat/app.py b/05_src/assignment_chat/app.py
@@ -0,0 +1,73 @@
+# app.py
+import sys
+import os
+from pathlib import Path
+
+
+BASE_DIR = Path(__file__).resolve().parent
+ROOT_DIR = BASE_DIR.parent
+if str(ROOT_DIR) not in sys.path:
+    sys.path.insert(0, str(ROOT_DIR))
+
+from assignment_chat.main import get_graph
+from langchain_core.messages import HumanMessage, AIMessage
+import gradio as gr
+from dotenv import load_dotenv
+from utils.logger import get_logger
+
+_logs = get_logger(__name__)
+
+# -----------------------------
+# Load environment secrets
+# -----------------------------
+load_dotenv(BASE_DIR / ".secrets")
+load_dotenv(BASE_DIR / ".env")
+
+# -----------------------------
+# Initialize LLM graph
+# -----------------------------
+try:
+    llm = get_graph()
+except Exception as e:
+    _logs.error(f"Failed to initialize LLM graph: {e}")
+    llm = None  # fallback to prevent crashes
+
+# -----------------------------
+# Chat callback for Gradio
+# -----------------------------
+def course_chat(message: str, history: list[dict] = None) -> str:
+    if history is None:
+        history = []
+
+    langchain_messages = []
+    n = 0
+    _logs.debug(f"History: {history}")
+
+    for msg in history:
+        if msg.get('role') == 'user':
+            langchain_messages.append(HumanMessage(content=msg['content']))
+        elif msg.get('role') == 'assistant':
+            langchain_messages.append(AIMessage(content=msg['content']))
+            n += 1
+
+    langchain_messages.append(HumanMessage(content=message))
+    state = {"messages": langchain_messages, "llm_calls": n}
+
+    try:
+        if llm:
+            response = llm.invoke(state)
+            return response['messages'][-1].content
+        else:
+            return "LLM not initialized. Cannot generate a response."
+    except Exception as e:
+        _logs.error(f"LLM invocation failed: {e}")
+        return "Error: could not generate a response."
+
+# -----------------------------
+# Launch Gradio chat interface
+# -----------------------------
+chat = gr.ChatInterface(fn=course_chat)
+
+if __name__ == "__main__":
+    _logs.info(f"Starting Course Chat App with CHROMA_MODE={os.getenv('CHROMA_MODE', 'undefined')}")
+    chat.launch()
diff --git a/05_src/assignment_chat/build_music_db.py b/05_src/assignment_chat/build_music_db.py
@@ -0,0 +1,81 @@
+#build_music_db.py
+
+import os
+import json
+from pathlib import Path
+import chromadb
+from chromadb.utils import embedding_functions
+from dotenv import load_dotenv
+
+# -----------------------------
+# Load environment variables
+# -----------------------------
+BASE_DIR = Path(__file__).resolve().parents[1]  # 05_src folder
+load_dotenv(BASE_DIR / ".env")
+load_dotenv(BASE_DIR / ".secrets")
+
+api_key = os.getenv("API_GATEWAY_KEY")
+if not api_key:
+    raise ValueError("API_GATEWAY_KEY not found in environment.")
+
+os.environ["CHROMA_OPENAI_API_KEY"] = api_key
+
+# -----------------------------
+# Initialize Chroma client
+# -----------------------------
+CHROMA_DB_PATH = Path(__file__).parent / "chroma_db"
+CHROMA_DB_PATH.mkdir(exist_ok=True)
+
+client = chromadb.PersistentClient(path=str(CHROMA_DB_PATH))
+
+embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
+    model_name="all-MiniLM-L6-v2"
+)
+
+# -----------------------------
+# Recreate collection safely
+# -----------------------------
+COLLECTION_NAME = "music_reviews"
+
+# Delete existing collection if it exists to avoid duplicates
+existing_collections = [c.name for c in client.list_collections()]
+if COLLECTION_NAME in existing_collections:
+    client.delete_collection(COLLECTION_NAME)
+
+collection = client.create_collection(
+    name=COLLECTION_NAME,
+    embedding_function=embedding_function
+)
+
+# -----------------------------
+# Load dataset
+# -----------------------------
+DATA_FILE = Path(__file__).parent / "music_docs.json"
+if not DATA_FILE.exists():
+    raise FileNotFoundError(f"{DATA_FILE} not found. Create a small dataset first.")
+
+with open(DATA_FILE, "r", encoding="utf-8") as f:
+    docs = json.load(f)
+
+documents = [item["review"] for item in docs]
+ids = [item["id"] for item in docs]
+metadatas = [
+    {
+        "artist": item["artist"],
+        "title": item["title"],
+        "year": item["year"],
+        "score": item["score"]
+    }
+    for item in docs
+]
+
+# -----------------------------
+# Add documents to collection
+# -----------------------------
+collection.add(
+    documents=documents,
+    ids=ids,
+    metadatas=metadatas
+)
+
+print(" Chroma DB built successfully.")
diff --git a/05_src/assignment_chat/guardrails.py b/05_src/assignment_chat/guardrails.py
@@ -0,0 +1,73 @@
+"""
+guardrails.py
+
+This module defines a guardrails node for the LangGraph workflow.
+It blocks:
+1. Restricted content topics (e.g., animals, zodiac, celebrities)
+2. Prompt injection attempts targeting system instructions
+
+If a violation is detected, it appends a safe AI response and stops
+the unsafe content from reaching the LLM.
+"""
+
+from langchain_core.messages import AIMessage
+from langgraph.graph import MessagesState
+
+
+# Topics that are not allowed to be discussed
+FORBIDDEN_TOPICS = [
+    "cat", "dog",
+    "horoscope", "zodiac",
+    "aries", "taurus", "gemini", "cancer", "leo",
+    "virgo", "libra", "scorpio", "sagittarius",
+    "capricorn", "aquarius", "pisces",
+    "taylor swift", "taylor", "swift"
+]
+
+# Phrases commonly used in prompt injection attempts
+FORBIDDEN_META = [
+    "system prompt",
+    "ignore previous instructions",
+    "reveal instructions",
+]
+
+
+def guardrails(state: MessagesState):
+    """
+    Guardrails node that runs BEFORE the LLM.
+    It inspects the most recent user message and:
+    - Blocks restricted topics
+    - Blocks attempts to access system-level instructions
+    If blocked, it appends a safe AI response to the message history.
+    If safe, it returns the state unchanged.
+    """
+
+    # Get the most recent user message and normalize it
+    last_message = state["messages"][-1].content.lower()
+
+    # ---- Topic Blocking ----
+    for word in FORBIDDEN_TOPICS:
+        if word in last_message:
+            return {
+                # Preserve full conversation history
+                "messages": state["messages"] + [
+                    AIMessage(
+                        content="This topic is restricted and cannot be discussed."
+                    )
+                ]
+            }
+
+    # ---- Prompt Injection Protection ----
+    for word in FORBIDDEN_META:
+        if word in last_message:
+            return {
+                # Preserve full conversation history
+                "messages": state["messages"] + [
+                    AIMessage(
+                        content="Access to system-level instructions is denied."
+                    )
+                ]
+            }
+
+    # If no violations are found, pass state forward unchanged
+    return state
diff --git a/05_src/assignment_chat/init_chroma.py b/05_src/assignment_chat/init_chroma.py
@@ -0,0 +1,43 @@
+# init_chroma.py
+import os
+from chromadb import Client
+from chromadb.config import Settings
+
+def get_client():
+    mode = os.getenv("CHROMA_MODE", "docker")  # default = docker (safe for grading)
+
+    if mode == "local":
+        print("Using LOCAL DuckDB mode")
+        persist_dir = os.path.join(os.getcwd(), "chroma_data")
+        os.makedirs(persist_dir, exist_ok=True)
+
+        return Client(Settings(
+            chroma_db_impl="duckdb+parquet",
+            persist_directory=persist_dir
+        ))
+
+    else:
+        print("Using DOCKER REST mode")
+        return Client(Settings(
+            chroma_api_impl="rest",
+            chroma_server_host="localhost",
+            chroma_server_http_port=8000
+        ))
+
+def main():
+    client = get_client()
+
+    collection_name = "pitchfork_reviews"
+
+    existing = [c.name for c in client.list_collections()]
+
+    if collection_name not in existing:
+        client.create_collection(name=collection_name)
+        print(f"Collection '{collection_name}' created.")
+    else:
+        print(f"Collection '{collection_name}' already exists.")
+
+    print("Chroma setup complete.")
+
+if __name__ == "__main__":
+    main()