Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
771 changes: 753 additions & 18 deletions 02_activities/assignment_1.ipynb

Large diffs are not rendered by default.

6 changes: 0 additions & 6 deletions 05_src/.secrets.template

This file was deleted.

8 changes: 8 additions & 0 deletions 05_src/assignment_chat/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import os
from dotenv import load_dotenv
load_dotenv(".env")
load_dotenv(".secrets")

# Ensure CHROMA_OPENAI_API_KEY is set globally
if "CHROMA_OPENAI_API_KEY" not in os.environ:
os.environ["CHROMA_OPENAI_API_KEY"] = os.getenv("API_GATEWAY_KEY")
73 changes: 73 additions & 0 deletions 05_src/assignment_chat/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# app.py
import sys
import os
from pathlib import Path


BASE_DIR = Path(__file__).resolve().parent
ROOT_DIR = BASE_DIR.parent
if str(ROOT_DIR) not in sys.path:
sys.path.insert(0, str(ROOT_DIR))

from assignment_chat.main import get_graph
from langchain_core.messages import HumanMessage, AIMessage
import gradio as gr
from dotenv import load_dotenv
from utils.logger import get_logger

_logs = get_logger(__name__)

# -----------------------------
# Load environment secrets
# -----------------------------
load_dotenv(BASE_DIR / ".secrets")
load_dotenv(BASE_DIR / ".env")

# -----------------------------
# Initialize LLM graph
# -----------------------------
try:
llm = get_graph()
except Exception as e:
_logs.error(f"Failed to initialize LLM graph: {e}")
llm = None # fallback to prevent crashes

# -----------------------------
# Chat callback for Gradio
# -----------------------------
def course_chat(message: str, history: list[dict] = None) -> str:
if history is None:
history = []

langchain_messages = []
n = 0
_logs.debug(f"History: {history}")

for msg in history:
if msg.get('role') == 'user':
langchain_messages.append(HumanMessage(content=msg['content']))
elif msg.get('role') == 'assistant':
langchain_messages.append(AIMessage(content=msg['content']))
n += 1

langchain_messages.append(HumanMessage(content=message))
state = {"messages": langchain_messages, "llm_calls": n}

try:
if llm:
response = llm.invoke(state)
return response['messages'][-1].content
else:
return "LLM not initialized. Cannot generate a response."
except Exception as e:
_logs.error(f"LLM invocation failed: {e}")
return "Error: could not generate a response."

# -----------------------------
# Launch Gradio chat interface
# -----------------------------
chat = gr.ChatInterface(fn=course_chat)

if __name__ == "__main__":
_logs.info(f"Starting Course Chat App with CHROMA_MODE={os.getenv('CHROMA_MODE', 'undefined')}")
chat.launch()
81 changes: 81 additions & 0 deletions 05_src/assignment_chat/build_music_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#build_music_db.py

import os
import json
from pathlib import Path
import chromadb
from chromadb.utils import embedding_functions
from dotenv import load_dotenv

# -----------------------------
# Load environment variables
# -----------------------------
BASE_DIR = Path(__file__).resolve().parents[1] # 05_src folder
load_dotenv(BASE_DIR / ".env")
load_dotenv(BASE_DIR / ".secrets")

api_key = os.getenv("API_GATEWAY_KEY")
if not api_key:
raise ValueError("API_GATEWAY_KEY not found in environment.")

os.environ["CHROMA_OPENAI_API_KEY"] = api_key

# -----------------------------
# Initialize Chroma client
# -----------------------------
CHROMA_DB_PATH = Path(__file__).parent / "chroma_db"
CHROMA_DB_PATH.mkdir(exist_ok=True)

client = chromadb.PersistentClient(path=str(CHROMA_DB_PATH))

embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
model_name="all-MiniLM-L6-v2"
)

# -----------------------------
# Recreate collection safely
# -----------------------------
COLLECTION_NAME = "music_reviews"

# Delete existing collection if it exists to avoid duplicates
existing_collections = [c.name for c in client.list_collections()]
if COLLECTION_NAME in existing_collections:
client.delete_collection(COLLECTION_NAME)

collection = client.create_collection(
name=COLLECTION_NAME,
embedding_function=embedding_function
)

# -----------------------------
# Load dataset
# -----------------------------
DATA_FILE = Path(__file__).parent / "music_docs.json"
if not DATA_FILE.exists():
raise FileNotFoundError(f"{DATA_FILE} not found. Create a small dataset first.")

with open(DATA_FILE, "r", encoding="utf-8") as f:
docs = json.load(f)

documents = [item["review"] for item in docs]
ids = [item["id"] for item in docs]
metadatas = [
{
"artist": item["artist"],
"title": item["title"],
"year": item["year"],
"score": item["score"]
}
for item in docs
]

# -----------------------------
# Add documents to collection
# -----------------------------
collection.add(
documents=documents,
ids=ids,
metadatas=metadatas
)

print(" Chroma DB built successfully.")
73 changes: 73 additions & 0 deletions 05_src/assignment_chat/guardrails.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
"""
guardrails.py

This module defines a guardrails node for the LangGraph workflow.
It blocks:
1. Restricted content topics (e.g., animals, zodiac, celebrities)
2. Prompt injection attempts targeting system instructions

If a violation is detected, it appends a safe AI response and stops
the unsafe content from reaching the LLM.
"""

from langchain_core.messages import AIMessage
from langgraph.graph import MessagesState


# Topics that are not allowed to be discussed
FORBIDDEN_TOPICS = [
"cat", "dog",
"horoscope", "zodiac",
"aries", "taurus", "gemini", "cancer", "leo",
"virgo", "libra", "scorpio", "sagittarius",
"capricorn", "aquarius", "pisces",
"taylor swift", "taylor", "swift"
]

# Phrases commonly used in prompt injection attempts
FORBIDDEN_META = [
"system prompt",
"ignore previous instructions",
"reveal instructions",
]


def guardrails(state: MessagesState):
"""
Guardrails node that runs BEFORE the LLM.
It inspects the most recent user message and:
- Blocks restricted topics
- Blocks attempts to access system-level instructions
If blocked, it appends a safe AI response to the message history.
If safe, it returns the state unchanged.
"""

# Get the most recent user message and normalize it
last_message = state["messages"][-1].content.lower()

# ---- Topic Blocking ----
for word in FORBIDDEN_TOPICS:
if word in last_message:
return {
# Preserve full conversation history
"messages": state["messages"] + [
AIMessage(
content="This topic is restricted and cannot be discussed."
)
]
}

# ---- Prompt Injection Protection ----
for word in FORBIDDEN_META:
if word in last_message:
return {
# Preserve full conversation history
"messages": state["messages"] + [
AIMessage(
content="Access to system-level instructions is denied."
)
]
}

# If no violations are found, pass state forward unchanged
return state
43 changes: 43 additions & 0 deletions 05_src/assignment_chat/init_chroma.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# init_chroma.py
import os
from chromadb import Client
from chromadb.config import Settings

def get_client():
mode = os.getenv("CHROMA_MODE", "docker") # default = docker (safe for grading)

if mode == "local":
print("Using LOCAL DuckDB mode")
persist_dir = os.path.join(os.getcwd(), "chroma_data")
os.makedirs(persist_dir, exist_ok=True)

return Client(Settings(
chroma_db_impl="duckdb+parquet",
persist_directory=persist_dir
))

else:
print("Using DOCKER REST mode")
return Client(Settings(
chroma_api_impl="rest",
chroma_server_host="localhost",
chroma_server_http_port=8000
))

def main():
client = get_client()

collection_name = "pitchfork_reviews"

existing = [c.name for c in client.list_collections()]

if collection_name not in existing:
client.create_collection(name=collection_name)
print(f"Collection '{collection_name}' created.")
else:
print(f"Collection '{collection_name}' already exists.")

print("Chroma setup complete.")

if __name__ == "__main__":
main()
Loading