diff --git a/uc-0a/agents.md b/uc-0a/agents.md index cd4d882..7dcdd7d 100644 --- a/uc-0a/agents.md +++ b/uc-0a/agents.md @@ -1,27 +1,37 @@ -# agents.md — UC-0A Complaint Classifier -# INSTRUCTIONS: -# 1. Open your AI tool -# 2. Paste the full contents of uc-0a/README.md -# 3. Use this prompt: -# "Read this UC README. Using the R.I.C.E framework, generate an -# agents.md YAML with four fields: role, intent, context, enforcement. -# Enforcement must include every rule listed under -# 'Enforcement Rules Your agents.md Must Include'. -# Output only valid YAML." -# 4. Paste the output below +# agents.md — UC-0A Complaint Classifier (RICE Framework) role: > - [FILL IN] + City Operations Complaint Classifier Agent intent: > - [FILL IN] + Classify incoming municipal complaints by category, priority, and severity + to route them to appropriate departments and flag urgent safety issues for + immediate intervention. context: > - [FILL IN] + The City Operations team receives hundreds of complaints weekly covering: + potholes, flooding, streetlight failures, waste, noise, road/heritage damage, + heat hazards, drain blockages. Each complaint has a description and location. + Some involve urgent safety situations (injuries, children, schools, hospitals). + Staff depend on accurate, consistent classifications for dashboard reporting. enforcement: - - "[FILL IN: category enum rule]" - - "[FILL IN: severity keyword rule — list the keywords]" - - "[FILL IN: reason field rule]" - - "[FILL IN: ambiguity refusal rule]" - - "[FILL IN: no invented categories rule]" + - rule: "Taxonomy Constraint (Fixed Enum)" + description: "Category MUST be exactly one value from: Pothole, Flooding, Streetlight, Waste, Noise, Road Damage, Heritage Damage, Heat Hazard, Drain Blockage, Other. NEVER invent category names." + keywords: "enum, fixed categories, no variations" + + - rule: "Severity Keyword Detection" + description: "Priority MUST be Urgent if description contains ANY of: injury, child, school, hospital, ambulance, fire, hazard, fell, collapse (case-insensitive). Otherwise Standard." + keywords: "injury, child, school, hospital, ambulance, fire, hazard, fell, collapse" + + - rule: "Justification Requirement" + description: "Every output row MUST include a reason field with exactly one sentence citing specific words from the original description. Extract key complaint element, do not repeat verbatim." + keywords: "reason, citation, specific words" + + - rule: "Ambiguity Handling" + description: "If category cannot be determined with confidence (vague/short/contradictory description), output category: Other and flag: NEEDS_REVIEW. Better to flag for review than confidently misclassify." + keywords: "ambiguity, NEEDS_REVIEW, confidence threshold" + + - rule: "No Hallucinated Sub-Categories" + description: "Never output sub-values like Pothole-Minor, Flooding-Severe, or Water-Related Damage. The category list is exhaustive." + keywords: "no sub-categories, exhaustive list" diff --git a/uc-0a/classifier.py b/uc-0a/classifier.py index 3f8fe55..b3ebcdc 100644 --- a/uc-0a/classifier.py +++ b/uc-0a/classifier.py @@ -1,31 +1,221 @@ """ UC-0A — Complaint Classifier -classifier.py — Starter file +classifier.py — RICE-constrained complaint classification -Build this using your AI coding tool: -1. Share agents.md, skills.md, and uc-0a/README.md -2. Ask the AI to implement this file -3. Run: python3 classifier.py --input ../data/city-test-files/test_pune.csv \ - --output results_pune.csv +Enforcement Rules: +1. Taxonomy Constraint: Category must be exactly one value from the allowed list +2. Severity Keyword Detection: Priority=Urgent if ANY severity keyword present +3. Justification Requirement: Every row must have a reason citing specific words +4. Ambiguity Handling: Unclear cases → Other + NEEDS_REVIEW +5. No Hallucinated Sub-Categories: Only use enum values, never invent """ import argparse import csv +import sys +import re + +# Enforcement: Fixed enum for categories (no variations allowed) +ALLOWED_CATEGORIES = { + "Pothole", "Flooding", "Streetlight", "Waste", "Noise", + "Road Damage", "Heritage Damage", "Heat Hazard", "Drain Blockage", "Other" +} + +# Enforcement: Severity keywords that trigger Urgent priority +SEVERITY_KEYWORDS = { + "injury", "child", "school", "hospital", "ambulance", "fire", "hazard", "fell", "collapse" +} + +def extract_reason(description: str) -> str: + """ + Extract a one-sentence reason citing specific words from the description. + Returns the first sentence or a summary of key complaint element. + """ + if not description or len(description.strip()) < 3: + return "Vague or empty description" + + # Try to extract first sentence (up to period, question mark, or exclamation) + sentences = re.split(r'[.!?]', description.strip()) + first_sentence = sentences[0].strip() + + if len(first_sentence) < 5: + return "Short/unclear complaint" + + # Extract key words (first 15 words or full first sentence) + words = first_sentence.split()[:15] + return " ".join(words) + +def has_severity_keywords(description: str) -> bool: + """ + Check if description contains ANY severity keyword. + Enforcement: ALL matches → Urgent (no threshold). + """ + if not description: + return False + + desc_lower = description.lower() + for keyword in SEVERITY_KEYWORDS: + if keyword in desc_lower: + return True + return False def classify_complaint(row: dict) -> dict: """ - Classify a single complaint row. + Classify a single complaint row using RICE enforcement rules. + + Enforcement: + - Category must be from ALLOWED_CATEGORIES enum (no invented values) + - Priority=Urgent if ANY severity keyword present + - Reason must cite specific words from description + - Ambiguous cases → Other + NEEDS_REVIEW + Returns dict with: complaint_id, category, priority, reason, flag """ - raise NotImplementedError("Build this using your AI tool + agents.md") + complaint_id = row.get("complaint_id", "") + description = row.get("description", "").strip() + location = row.get("location", "").strip() + + # Extract reason (always include, never empty) + reason = extract_reason(description) + + # Check for severity keywords (Urgent detection) + has_severity = has_severity_keywords(description) + + # Determine category based on description keywords + # If no strong match found, classify as Other + category = determine_category(description) + + # Determine priority + if has_severity: + priority = "Urgent" + else: + priority = "Standard" # Default; could be Low for explicit non-urgent indicators + + # Flag ambiguous cases + flag = "" + confidence = calculate_confidence(description, category) + if confidence < 0.6: # Low confidence threshold + flag = "NEEDS_REVIEW" + + return { + "complaint_id": complaint_id, + "category": category, + "priority": priority, + "reason": reason, + "flag": flag + } + +def determine_category(description: str) -> str: + """ + Determine the category based on description content. + Enforcement: Only return values from ALLOWED_CATEGORIES. + If no clear match, return 'Other' (never invent categories). + """ + if not description: + return "Other" + + desc_lower = description.lower() + + # Define keyword patterns for each category (not exhaustive, just patterns) + category_patterns = { + "Pothole": ["pothole", "hole", "pit", "crater"], + "Flooding": ["flood", "water", "wet", "inundation"], + "Streetlight": ["light", "street light", "lamp", "dark", "blackout"], + "Waste": ["garbage", "waste", "trash", "litter", "debris"], + "Noise": ["noise", "sound", "loud", "loudspeaker", "music"], + "Road Damage": ["road damage", "road", "asphalt", "pavement", "cracked"], + "Heritage Damage": ["heritage", "monument", "historical", "ancient"], + "Heat Hazard": ["heat", "hot", "temperature"], + "Drain Blockage": ["drain", "drainage", "sewage", "blockage", "clogged"] + } + + # Score each category by keyword matches + best_category = "Other" + best_score = 0 + + for category, keywords in category_patterns.items(): + score = sum(1 for kw in keywords if kw in desc_lower) + if score > best_score: + best_score = score + best_category = category + + # Enforcement: Always return a value from ALLOWED_CATEGORIES + assert best_category in ALLOWED_CATEGORIES, f"Invalid category: {best_category}" + return best_category + +def calculate_confidence(description: str, category: str) -> float: + """ + Calculate confidence score for the classification. + Returns 0.0–1.0. Below 0.6 triggers NEEDS_REVIEW flag. + """ + if not description or len(description.strip()) < 5: + return 0.3 # Very short/vague + + if len(description.strip()) < 10: + return 0.5 # Too short + + if category == "Other": + return 0.4 # Default category (uncertain) + + return 0.8 # Reasonable confidence for matched categories def batch_classify(input_path: str, output_path: str): - """Read input CSV, classify each row, write results CSV.""" - raise NotImplementedError("Build this using your AI tool + agents.md") + """ + Read input CSV, classify each row, write results CSV. + + Enforcement: + - Malformed rows logged and skipped, processing continues + - Every row receives output (never skip) + - All required fields present in output + """ + rows_processed = 0 + rows_skipped = 0 + results = [] + + try: + with open(input_path, "r", encoding="utf-8") as infile: + reader = csv.DictReader(infile) + + for row_num, row in enumerate(reader, start=2): # start=2 (header is 1) + try: + # Validate required fields + if not row.get("complaint_id") or not row.get("description"): + print(f"Warning: Row {row_num} missing complaint_id or description. Skipping.", file=sys.stderr) + rows_skipped += 1 + continue + + # Classify the complaint + result = classify_complaint(row) + results.append(result) + rows_processed += 1 + + except Exception as e: + print(f"Error processing row {row_num}: {e}", file=sys.stderr) + rows_skipped += 1 + continue + + # Write results to output CSV + if results: + with open(output_path, "w", newline="", encoding="utf-8") as outfile: + fieldnames = ["complaint_id", "category", "priority", "reason", "flag"] + writer = csv.DictWriter(outfile, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(results) + + print(f"Processed: {rows_processed}, Skipped: {rows_skipped}") + return output_path + + except FileNotFoundError: + print(f"Error: Input file not found: {input_path}", file=sys.stderr) + raise + except Exception as e: + print(f"Error during batch classification: {e}", file=sys.stderr) + raise if __name__ == "__main__": parser = argparse.ArgumentParser(description="UC-0A Complaint Classifier") - parser.add_argument("--input", required=True) - parser.add_argument("--output", required=True) + parser.add_argument("--input", required=True, help="Path to input CSV (test_[city].csv)") + parser.add_argument("--output", required=True, help="Path to output CSV (results_[city].csv)") args = parser.parse_args() + batch_classify(args.input, args.output) print(f"Done. Results written to {args.output}") diff --git a/uc-0a/results_ahmedabad.csv b/uc-0a/results_ahmedabad.csv new file mode 100644 index 0000000..5b36ad2 --- /dev/null +++ b/uc-0a/results_ahmedabad.csv @@ -0,0 +1,16 @@ +complaint_id,category,priority,reason,flag +AM-202401,Other,Standard,Tarmac surface melting at 44°C,NEEDS_REVIEW +AM-202402,Heat Hazard,Standard,Metal bus shelter reaching dangerous temperatures, +AM-202405,Other,Standard,Dead trees with split branches,NEEDS_REVIEW +AM-202406,Heat Hazard,Standard,Irrigation system broken, +AM-202407,Other,Urgent,Broken bench and upturned paving,NEEDS_REVIEW +AM-202410,Pothole,Standard,Pothole on main highway causing morning rush lane closure, +AM-202414,Other,Standard,Residential colony unlit after 9pm,NEEDS_REVIEW +AM-202417,Waste,Standard,Night market waste not cleared before morning, +AM-202421,Noise,Standard,Club music audible at residential buildings at 2am, +AM-202424,Road Damage,Standard,Zoo approach road surface bubbling at 45°C, +AM-202429,Heat Hazard,Standard,River walk surface temperature unbearable, +AM-202431,Heritage Damage,Standard,Old city road subsidence near ancient step well, +AM-202435,Road Damage,Standard,Black metal road dividers storing heat, +AM-202444,Waste,Standard,Restaurant waste bins overflowing on Sunday night, +AM-202445,Other,Standard,BRT shelter roof glass broken,NEEDS_REVIEW diff --git a/uc-0a/results_hyderabad.csv b/uc-0a/results_hyderabad.csv new file mode 100644 index 0000000..643d9f8 --- /dev/null +++ b/uc-0a/results_hyderabad.csv @@ -0,0 +1,16 @@ +complaint_id,category,priority,reason,flag +GH-202401,Flooding,Urgent,Underpass flooded after 1hr rain, +GH-202402,Flooding,Standard,Market area flooded, +GH-202406,Flooding,Standard,Main stormwater drain 100% blocked with construction debris, +GH-202407,Drain Blockage,Standard,Drain blocked and mosquito breeding, +GH-202410,Pothole,Standard,Potholes causing vehicles to slow to 20kmph on fast road, +GH-202411,Pothole,Urgent,Pothole swallowed entire motorcycle wheel, +GH-202412,Pothole,Urgent,School bus struggling to navigate 6 potholes in 200m stretch, +GH-202417,Waste,Standard,Heritage zone garbage overflow, +GH-202420,Other,Standard,Construction drilling from 5am daily near residential towers,NEEDS_REVIEW +GH-202422,Pothole,Urgent,Road collapsed partially, +GH-202424,Flooding,Standard,Underpass floods in light rain, +GH-202428,Waste,Standard,Post-market waste not cleared, +GH-202432,Other,Standard,24hr supermarket delivery trucks idling with engines on,NEEDS_REVIEW +GH-202448,Flooding,Standard,Main drain blocked — entire locality at flooding risk this week, +GH-202438,Flooding,Standard,Colony surrounded by fields that channel rainwater through main road, diff --git a/uc-0a/results_kolkata.csv b/uc-0a/results_kolkata.csv new file mode 100644 index 0000000..e0b59fd --- /dev/null +++ b/uc-0a/results_kolkata.csv @@ -0,0 +1,16 @@ +complaint_id,category,priority,reason,flag +KM-202401,Streetlight,Standard,Heritage lamp post knocked over by delivery vehicle, +KM-202402,Road Damage,Standard,Historic tram road cobblestones broken up by cable laying work, +KM-202405,Other,Standard,Wedding band playing near Tagore Museum at 11pm,NEEDS_REVIEW +KM-202409,Pothole,Standard,Airport access road full of potholes, +KM-202410,Pothole,Standard,Pothole causing tyre blowouts, +KM-202411,Pothole,Standard,Deep pothole filling with rainwater, +KM-202415,Road Damage,Standard,New residential complex draining directly onto public road, +KM-202418,Waste,Standard,Tourist zone waste overflowing, +KM-202421,Pothole,Urgent,Footpath broken and sinking, +KM-202422,Road Damage,Standard,Road surface buckled near bridge, +KM-202426,Heritage Damage,Standard,Heritage residential building exterior defaced by billboard installation, +KM-202430,Road Damage,Standard,Road subsided near gas pipeline, +KM-202434,Heritage Damage,Standard,Street paving removed for utility work — heritage stone not replaced, +KM-202436,Streetlight,Standard,Entire colony substation tripped, +KM-202438,Heritage Damage,Standard,Street vendors using amplifiers illegally in heritage precinct, diff --git a/uc-0a/results_pune.csv b/uc-0a/results_pune.csv new file mode 100644 index 0000000..e99c889 --- /dev/null +++ b/uc-0a/results_pune.csv @@ -0,0 +1,16 @@ +complaint_id,category,priority,reason,flag +PM-202401,Pothole,Standard,Large pothole 60cm wide causing tyre damage, +PM-202402,Pothole,Urgent,Deep pothole near bus stop, +PM-202406,Flooding,Standard,Underpass flooded knee-deep after 2hrs rain, +PM-202408,Flooding,Standard,Bus stand flooded, +PM-202410,Streetlight,Standard,Three consecutive streetlights out for 10 days, +PM-202411,Streetlight,Urgent,Streetlight flickering and sparking, +PM-202413,Waste,Standard,Overflowing garbage bins near vegetable market, +PM-202418,Noise,Standard,Wedding venue playing music past midnight on weeknights, +PM-202419,Road Damage,Standard,Road surface cracked and sinking near utility work done 1 month ago, +PM-202420,Pothole,Urgent,Manhole cover missing, +PM-202427,Flooding,Standard,Bridge approach floods in 30mins of rain, +PM-202428,Other,Standard,Dead animal not removed for 36 hours,NEEDS_REVIEW +PM-202430,Streetlight,Standard,"Heritage street, lights out", +PM-202433,Waste,Standard,Bulk waste from apartment renovation dumped on public road, +PM-202446,Other,Urgent,Footpath tiles broken and upturned,NEEDS_REVIEW diff --git a/uc-0a/skills.md b/uc-0a/skills.md index 4e67823..04557be 100644 --- a/uc-0a/skills.md +++ b/uc-0a/skills.md @@ -1,15 +1,35 @@ # skills.md — UC-0A Complaint Classifier -# INSTRUCTIONS: Same as agents.md — paste README into AI, ask for skills.md YAML skills: - name: classify_complaint - description: "[FILL IN]" - input: "[FILL IN]" - output: "[FILL IN]" - error_handling: "[FILL IN]" + description: "Classify a single complaint row using RICE enforcement rules. Takes description and location, outputs category, priority, reason, and flag." + input: | + { + "complaint_id": "string", + "description": "string (complaint text)", + "location": "string (optional)" + } + output: | + { + "complaint_id": "string", + "category": "string (one of Pothole|Flooding|Streetlight|Waste|Noise|Road Damage|Heritage Damage|Heat Hazard|Drain Blockage|Other)", + "priority": "string (Urgent|Standard|Low)", + "reason": "string (one sentence citing specific words from description)", + "flag": "string (NEEDS_REVIEW or empty)" + } + error_handling: "Vague/short/ambiguous descriptions → output category: Other, flag: NEEDS_REVIEW. Never fail; always produce output." - name: batch_classify - description: "[FILL IN]" - input: "[FILL IN]" - output: "[FILL IN]" - error_handling: "[FILL IN]" + description: "Read a test CSV file, classify each row using classify_complaint skill, write results to output CSV with all required fields." + input: | + { + "input_path": "string (path to test_[city].csv)", + "output_path": "string (path to results_[city].csv)" + } + output: | + { + "output_path": "string (path to written results CSV)", + "rows_processed": "integer", + "rows_skipped": "integer" + } + error_handling: "Malformed rows logged and skipped. Processing continues. No row failure should stop batch. All errors logged to console with row number and reason." diff --git a/uc-mcp/agents.md b/uc-mcp/agents.md index d2e55c8..a1abfe5 100644 --- a/uc-mcp/agents.md +++ b/uc-mcp/agents.md @@ -1,32 +1,40 @@ -# agents.md — UC-MCP MCP Server -# INSTRUCTIONS: -# 1. Open your AI tool -# 2. Paste the full contents of uc-mcp/README.md -# 3. Use this prompt: -# "Read this UC README. Using the R.I.C.E framework, generate an -# agents.md YAML with four fields: role, intent, context, enforcement. -# The enforcement must include every rule listed under -# 'Enforcement Rules Your agents.md Must Include'. -# Output only valid YAML." -# 4. Paste the output below, replacing this placeholder -# 5. Pay special attention to enforcement rule 1 — the tool description -# must state exact document scope +# agents.md — UC-MCP MCP Server (RICE Framework) role: > - [FILL IN: Who is this agent? What layer of the stack does it operate at? - Hint: an MCP server that exposes policy retrieval as a tool] + MCP Server exposing City Municipal Corporation policy retrieval as a + standardized tool. Operates at the tool integration layer of the AI stack. + Allows any agent (not just custom scripts) to query CMC policies through + a standard JSON-RPC interface. intent: > - [FILL IN: What does a correctly implemented MCP server produce? - Hint: JSON-RPC compliant responses, scoped tool description, correct refusals] + Respond to tools/list with clear tool scope and refuse-behavior documentation. + Respond to tools/call with grounded answers from the RAG server or a + structured refusal when out of scope. All responses are JSON-RPC 2.0 compliant. context: > - [FILL IN: What does this server have access to? - Hint: RAG server results only — no direct LLM calls, no outside knowledge] + This server has access to: + - RAG server results only (stub_rag.py or rag_server.py) + - No direct LLM calls without retrieved context + - No outside knowledge; scope strictly to CMC HR Leave, IT Acceptable Use, + and Finance Reimbursement policies enforcement: - - "[FILL IN: Tool description scope rule]" - - "[FILL IN: Refusal documentation rule]" - - "[FILL IN: inputSchema required field rule]" - - "[FILL IN: isError on failure rule]" - - "[FILL IN: HTTP 200 for all JSON-RPC responses rule]" + - rule: "Tool Description Scope Specification" + description: "Tool description must explicitly state exact document scope: CMC HR Leave Policy, IT Acceptable Use Policy, Finance Reimbursement Policy. Must also state refusal behavior: 'Returns answers grounded in retrieved document chunks with cited sources. Questions outside these three documents return a refusal message — this tool does not answer general knowledge questions, budget forecasts, or topics not covered by the indexed CMC policy documents.'" + keywords: "exact scope, document names, refusal note" + + - rule: "Refusal Documentation" + description: "Tool description must state what the tool cannot answer: 'This tool will not answer general knowledge questions, budget forecasts, or any topics not covered by the indexed CMC policy documents.'" + keywords: "what not to answer, limitations, scope boundaries" + + - rule: "inputSchema Required Field" + description: "inputSchema must require 'question' as a non-empty string. The field is mandatory (in required array). Help agent validate input before calling." + keywords: "question required, input validation, type string" + + - rule: "Error Responses Use isError: true" + description: "All error conditions must use isError: true in the response, never return an empty content array. Always include a text explanation of why the request failed." + keywords: "isError: true, error handling, explicit failure indication" + + - rule: "HTTP 200 for All JSON-RPC Responses" + description: "Transport errors (malformed requests, connection failures) use HTTP 4xx/5xx. Application-level errors (unknown method, invalid params) use HTTP 200 with JSON-RPC error object. Never fail with HTTP 5xx for application errors." + keywords: "HTTP 200, JSON-RPC error object, transport vs application errors" diff --git a/uc-mcp/mcp_server.py b/uc-mcp/mcp_server.py index 0400b6a..6428ed4 100644 --- a/uc-mcp/mcp_server.py +++ b/uc-mcp/mcp_server.py @@ -1,20 +1,18 @@ """ UC-MCP — mcp_server.py -Plain HTTP MCP Server — Starter File +Plain HTTP MCP Server implementing JSON-RPC 2.0 -Build this using your AI coding tool: -1. Share agents.md, skills.md, and uc-mcp/README.md with your AI tool -2. Ask it to implement this file following the MCP protocol - described in the README -3. Run with: python3 mcp_server.py --port 8765 -4. Test with: python3 test_client.py --port 8765 +RICE-enforced scope: +- Tool description explicitly states document scope (CMC policies) +- Tool description states refusal behavior +- All errors use isError: true +- All responses HTTP 200 -Protocol: JSON-RPC 2.0 over HTTP POST -No external dependencies beyond Python stdlib. +Stack: Python stdlib only (http.server, json) -Methods to implement: - tools/list — return the tool definition for query_policy_documents - tools/call — execute query_policy_documents, return JSON-RPC response +Usage: + python3 mcp_server.py --port 8765 + python3 test_client.py --port 8765 """ import json @@ -27,30 +25,31 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../uc-rag")) try: # Try participant's rag_server first - from rag_server import query as rag_query + from rag_server import retrieve_and_answer print("[mcp_server] Using participant rag_server.py") -except (ImportError, NotImplementedError): + rag_mode = "participant" +except (ImportError, NotImplementedError, AttributeError): # Fall back to stub from stub_rag import query as rag_query print("[mcp_server] Using stub_rag.py (fallback)") + rag_mode = "stub" # Import LLM adapter from llm_adapter import call_llm -# ── TOOL DEFINITION ────────────────────────────────────────────────────────── -# This is what the agent reads to decide when to call your tool. -# The description IS the enforcement — make it specific. +# ══════════════════════════════════════════════════════════════════════════════ +# ENFORCEMENT: Tool Description = Policy Scope + Refusal Note +# ══════════════════════════════════════════════════════════════════════════════ TOOL_DEFINITION = { "name": "query_policy_documents", "description": ( - # FILL IN: Describe exactly what this tool covers and what it does not. - # Bad: "Answers questions about policies" - # Good: "Answers questions about CMC HR Leave Policy, IT Acceptable Use - # Policy, and Finance Reimbursement Policy only. Returns cited - # answers grounded in retrieved document chunks. Returns a refusal - # for questions outside these three documents." - "[FILL IN: specific scope + what it refuses]" + "Answers questions about City Municipal Corporation (CMC) policy documents: " + "HR Leave Policy, IT Acceptable Use Policy, and Finance Reimbursement Policy. " + "Returns answers grounded in retrieved document chunks with cited sources. " + "Questions outside these three documents return a refusal message — " + "this tool does not answer general knowledge questions, budget forecasts, " + "or topics not covered by the indexed CMC policy documents." ), "inputSchema": { "type": "object", @@ -65,61 +64,224 @@ } -# ── SKILL: query_policy_documents ──────────────────────────────────────────── +# ══════════════════════════════════════════════════════════════════════════════ +# SKILL: query_policy_documents +# ══════════════════════════════════════════════════════════════════════════════ def query_policy_documents(question: str) -> dict: """ - Call the RAG server with the question. - Return MCP content format: {"content": [...], "isError": bool} - + ENFORCEMENT: Call RAG server and format response as MCP content. + + Returns: {"content": [{type, text}], "isError": bool} + Error handling: - - If RAG refuses (no chunks above threshold) → isError: True - - If RAG raises exception → isError: True with error message + - If RAG refuses (no chunks above threshold) → isError: true + - If RAG raises exception → isError: true with error message """ - raise NotImplementedError( - "Implement query_policy_documents using your AI tool.\n" - "Hint: call rag_query(question, llm_call=call_llm), " - "check result['refused'], format as MCP content response." - ) + try: + if rag_mode == "participant": + # Our rag_server.py implementation + import chromadb + from sentence_transformers import SentenceTransformer + + embedder = SentenceTransformer("all-MiniLM-L6-v2") + db_path = os.path.join(os.path.dirname(__file__), "../uc-rag/chroma_db") + client = chromadb.PersistentClient(path=db_path) + collection = client.get_or_create_collection("policies") + + result = retrieve_and_answer(question, collection, embedder) + + if result.get("is_refusal", False): + return { + "content": [{"type": "text", "text": result["answer"]}], + "isError": True + } + else: + return { + "content": [{"type": "text", "text": result["answer"]}], + "isError": False + } + else: + # stub_rag.py implementation + result = rag_query(question, llm_call=call_llm) + + if result.get("refused", False): + return { + "content": [{"type": "text", "text": result.get("answer", "Refused")}], + "isError": True + } + else: + return { + "content": [{"type": "text", "text": result.get("answer", "")}], + "isError": False + } + + except Exception as e: + return { + "content": [{"type": "text", "text": f"Error querying RAG server: {str(e)}"}], + "isError": True + } -# ── SKILL: serve_mcp ───────────────────────────────────────────────────────── +# ══════════════════════════════════════════════════════════════════════════════ +# SKILL: serve_mcp — JSON-RPC 2.0 Handler +# ══════════════════════════════════════════════════════════════════════════════ class MCPHandler(BaseHTTPRequestHandler): """ - HTTP request handler implementing JSON-RPC 2.0. - Handles POST requests to / with JSON-RPC body. - - Implement: + HTTP request handler implementing JSON-RPC 2.0 over POST. + + Methods: - tools/list → return TOOL_DEFINITION - - tools/call → call query_policy_documents, return result - - unknown methods → JSON-RPC error -32601 + - tools/call → invoke query_policy_documents + - unknown → JSON-RPC error -32601 + + ENFORCEMENT: All responses HTTP 200, errors in JSON-RPC body """ def do_POST(self): - raise NotImplementedError( - "Implement do_POST using your AI tool.\n" - "Hint: read Content-Length, parse JSON body, " - "dispatch on method, write JSON-RPC response.\n" - "Return HTTP 200 for all JSON-RPC responses including errors." - ) + """ + Handle incoming JSON-RPC request. + ENFORCEMENT: Always return HTTP 200 for application errors. + """ + try: + # Read request body + content_length = int(self.headers.get("Content-Length", 0)) + if content_length == 0: + self._send_json_rpc_error(None, -32700, "Parse error: empty body") + return + + body = self.rfile.read(content_length).decode("utf-8") + + try: + request = json.loads(body) + except json.JSONDecodeError as e: + self._send_json_rpc_error(None, -32700, f"Parse error: {str(e)}") + return + + # Validate JSON-RPC structure + if not isinstance(request, dict): + self._send_json_rpc_error(None, -32700, "Parse error: not an object") + return + + method = request.get("method") + params = request.get("params") + req_id = request.get("id") + + if not method: + self._send_json_rpc_error(req_id, -32700, "Parse error: missing method") + return + + # Dispatch to method + if method == "tools/list": + self._handle_tools_list(req_id) + + elif method == "tools/call": + self._handle_tools_call(req_id, params) + + else: + # Unknown method + self._send_json_rpc_error(req_id, -32601, f"Method not found: {method}") + + except Exception as e: + self._send_json_rpc_error(None, -32603, f"Internal error: {str(e)}") + + def _handle_tools_list(self, req_id): + """ + Respond to tools/list with tool definition. + Response format: {result: {tools: [TOOL_DEFINITION]}} + """ + response = { + "jsonrpc": "2.0", + "id": req_id, + "result": { + "tools": [TOOL_DEFINITION] + } + } + self._send_response_json(response) + + def _handle_tools_call(self, req_id, params): + """ + Respond to tools/call by invoking query_policy_documents. + Response format: {result: {content: [...], isError: bool}} + """ + if not isinstance(params, dict): + self._send_json_rpc_error(req_id, -32602, "Invalid params: not an object") + return + + tool_name = params.get("name") + arguments = params.get("arguments", {}) + + if tool_name != "query_policy_documents": + self._send_json_rpc_error(req_id, -32602, f"Unknown tool: {tool_name}") + return + + if not isinstance(arguments, dict): + self._send_json_rpc_error(req_id, -32602, "Invalid arguments: not an object") + return + + question = arguments.get("question") + if not question or not isinstance(question, str): + self._send_json_rpc_error(req_id, -32602, "Invalid arguments: missing or invalid 'question' field") + return + + # Call the skill + result = query_policy_documents(question) + + # Wrap in JSON-RPC response + response = { + "jsonrpc": "2.0", + "id": req_id, + "result": result + } + self._send_response_json(response) + + def _send_response_json(self, response_dict: dict): + """Send a JSON-RPC response with HTTP 200.""" + response_json = json.dumps(response_dict) + response_bytes = response_json.encode("utf-8") + + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", len(response_bytes)) + self.end_headers() + self.wfile.write(response_bytes) + + def _send_json_rpc_error(self, req_id, error_code: int, error_message: str): + """Send a JSON-RPC error response with HTTP 200.""" + response = { + "jsonrpc": "2.0", + "id": req_id, + "error": { + "code": error_code, + "message": error_message + } + } + self._send_response_json(response) def log_message(self, format, *args): - # Suppress default HTTP logging — use print for clarity - print(f"[mcp_server] {args[0]} {args[1]}") + """Suppress default HTTP logging.""" + if len(args) > 1: + print(f"[mcp_server] {args[1]}") -# ── MAIN ───────────────────────────────────────────────────────────────────── +# ══════════════════════════════════════════════════════════════════════════════ +# MAIN +# ══════════════════════════════════════════════════════════════════════════════ def main(): parser = argparse.ArgumentParser(description="UC-MCP Plain HTTP MCP Server") parser.add_argument("--port", type=int, default=8765, help="Port to listen on (default: 8765)") args = parser.parse_args() - # Verify RAG index exists - db_path = os.path.join(os.path.dirname(__file__), "../uc-rag/stub_chroma_db") - if not os.path.exists(db_path): - print("[mcp_server] WARNING: RAG index not found.") - print("[mcp_server] Run first: python3 ../uc-rag/stub_rag.py --build-index") - print("[mcp_server] Starting anyway — queries will fail until index is built.") + # Verify stub RAG index exists + stub_db_path = os.path.join(os.path.dirname(__file__), "../uc-rag/stub_chroma_db") + participant_db_path = os.path.join(os.path.dirname(__file__), "../uc-rag/chroma_db") + + if not os.path.exists(stub_db_path) and not os.path.exists(participant_db_path): + print("[mcp_server] WARNING: RAG index not found at either location:") + print(f"[mcp_server] stub: {stub_db_path}") + print(f"[mcp_server] participant: {participant_db_path}") + print("[mcp_server] Build stub index: python3 ../uc-rag/stub_rag.py --build-index") + print("[mcp_server] Or build your own: python3 ../uc-rag/rag_server.py --build-index") server = HTTPServer(("localhost", args.port), MCPHandler) print(f"[mcp_server] MCP server running on http://localhost:{args.port}") @@ -133,3 +295,4 @@ def main(): if __name__ == "__main__": main() + diff --git a/uc-mcp/skills.md b/uc-mcp/skills.md index 5028507..9517e86 100644 --- a/uc-mcp/skills.md +++ b/uc-mcp/skills.md @@ -1,24 +1,37 @@ # skills.md — UC-MCP MCP Server -# INSTRUCTIONS: -# 1. Open your AI tool -# 2. Paste the full contents of uc-mcp/README.md -# 3. Use this prompt: -# "Read this UC README. Generate a skills.md YAML defining the two -# skills: query_policy_documents and serve_mcp. Each skill needs: -# name, description, input, output, error_handling. -# error_handling must address the failure mode in the README. -# Output only valid YAML." -# 4. Paste the output below, replacing this placeholder skills: - name: query_policy_documents - description: "[FILL IN]" - input: "[FILL IN: question string]" - output: "[FILL IN: MCP content format — content array + isError]" - error_handling: "[FILL IN: what happens when RAG refuses or raises exception]" + description: "Call the RAG server with the user's question. Return MCP-formatted response: content array with text field plus isError boolean. Handle RAG refusals and exceptions by returning isError: true with clear error message." + input: | + { + "question": "string (user's policy question, non-empty)" + } + output: | + { + "content": [ + { + "type": "text", + "text": "string (answer or refusal message)" + } + ], + "isError": "boolean (true if refusal or exception)" + } + error_handling: "If RAG returns refused=true (below 0.6 threshold), set isError=true and include the refusal message in text field. If RAG server raises exception, set isError=true with error description. Never return empty content array." - name: serve_mcp - description: "[FILL IN]" - input: "[FILL IN: HTTP POST with JSON-RPC body]" - output: "[FILL IN: JSON-RPC 2.0 response, always HTTP 200]" - error_handling: "[FILL IN: unknown method → -32601, malformed request → -32700]" + description: "Start an HTTP server on configurable port implementing JSON-RPC 2.0 over POST. Handle tools/list (return query_policy_documents definition) and tools/call (invoke query_policy_documents with question). Return JSON-RPC compliant responses always with HTTP 200 for application errors." + input: | + { + "method": "string (tools/list or tools/call)", + "params": "object (for tools/call: {name: string, arguments: {question: string}})", + "id": "integer (JSON-RPC request ID)" + } + output: | + HTTP 200 JSON-RPC 2.0 response: + { + "jsonrpc": "2.0", + "id": "integer (same as request)", + "result": "object or null" | "error": "object (if error)" + } + error_handling: "Unknown method → JSON-RPC error -32601 Method not found. Malformed request → error -32700 Parse error. Invalid params → error -32602 Invalid params. All errors returned with HTTP 200, never HTTP 5xx." diff --git a/uc-rag/agents.md b/uc-rag/agents.md index 186c909..e4a6f8f 100644 --- a/uc-rag/agents.md +++ b/uc-rag/agents.md @@ -1,31 +1,44 @@ -# agents.md — UC-RAG RAG Server -# INSTRUCTIONS: -# 1. Open your AI tool -# 2. Paste the full contents of uc-rag/README.md -# 3. Use this prompt: -# "Read this UC README. Using the R.I.C.E framework, generate an -# agents.md YAML with four fields: role, intent, context, enforcement. -# Enforcement must include every rule listed under -# 'Enforcement Rules Your agents.md Must Include'. -# Output only valid YAML." -# 4. Paste the output below, replacing this placeholder -# 5. Check every enforcement rule against the README before saving +# agents.md — UC-RAG RAG Server (RICE Framework) role: > - [FILL IN: Who is this agent? What is its operational boundary? - Hint: a retrieval-augmented policy assistant for city staff] + City Municipal Corporation Policy Assistant serving staff queries on + HR Leave Policy, IT Acceptable Use Policy, and Finance Reimbursement Policy. + Operational boundary: ONLY these three policy documents. No general knowledge. intent: > - [FILL IN: What does a correct output look like? - Hint: answer + cited chunks + refusal when not covered] + For each query, retrieve the relevant policy chunks, cite their sources, + and ground the answer exclusively in the retrieved text. If no chunks + meet the similarity threshold (0.6), return the standardized refusal + template. Never generate answers from general knowledge or blend + information across documents. context: > - [FILL IN: What sources may the agent use? - Hint: retrieved chunks only — no general knowledge] + The source documents are: + - data/policy-documents/policy_hr_leave.txt + - data/policy-documents/policy_it_acceptable_use.txt + - data/policy-documents/policy_finance_reimbursement.txt + + Chunks are embedded using SentenceTransformer (all-MiniLM-L6-v2) and + stored in ChromaDB. Retrieval uses cosine similarity. Only chunks + scoring above 0.6 are considered relevant. enforcement: - - "[FILL IN: Chunk size rule]" - - "[FILL IN: Citation rule]" - - "[FILL IN: Similarity threshold + refusal rule]" - - "[FILL IN: Context grounding rule]" - - "[FILL IN: Cross-document rule]" + - rule: "Chunk Size and Sentence Boundaries" + description: "Chunk size must not exceed 400 tokens. Never split mid-sentence. Use sentence-aware chunking: accumulate sentences until 400 tokens reached, then start new chunk. This prevents clause 5.2 type failures where 'requires approval from Department Head' and 'and HR Director' are in separate chunks." + keywords: "400 tokens, sentence boundary, no mid-sentence split" + + - rule: "Mandatory Citation" + description: "Every answer must cite the source document name and chunk index. Example: 'From policy_hr_leave.txt chunk 0: ...'. Never answer without citing source." + keywords: "source, chunk index, citation, document name" + + - rule: "Similarity Threshold and Refusal" + description: "If no retrieved chunk scores above similarity threshold 0.6, do NOT generate an answer. Return refusal template: 'This question is not covered in the retrieved policy documents. Retrieved chunks: [list]. Please contact the relevant department.'" + keywords: "0.6 threshold, refusal template, similarity score" + + - rule: "Context Grounding" + description: "Answers must use ONLY information present in the retrieved chunks. Never add context, assumptions, or qualifications from outside the retrieved set. If the query spans two documents, retrieve from each separately and do not merge results into a single blended answer." + keywords: "retrieved chunks only, no assumptions, no blending" + + - rule: "Cross-Document Separation" + description: "If a query logically spans two documents (e.g., 'leave policy and work-from-home allowance'), retrieve and answer from each document separately. Return answers with separate citations per document. Never merge chunks from different documents into one answer." + keywords: "separate retrieval, no cross-document blending, separate citations" diff --git a/uc-rag/rag_server.py b/uc-rag/rag_server.py index 3acfb1d..e9e30a7 100644 --- a/uc-rag/rag_server.py +++ b/uc-rag/rag_server.py @@ -1,97 +1,304 @@ """ UC-RAG — RAG Server -rag_server.py — Starter file - -Build this using your AI coding tool: -1. Share the contents of agents.md, skills.md, and uc-rag/README.md -2. Ask the AI to implement this file following the enforcement rules - in agents.md and the skill definitions in skills.md -3. Run with: python3 rag_server.py --build-index -4. Then: python3 rag_server.py --query "your question here" +rag_server.py — RICE-constrained RAG with sentence-aware chunking and 0.6 threshold Stack: - pip3 install sentence-transformers chromadb - LLM: set your API key in llm_adapter.py (../uc-mcp/llm_adapter.py) - or set environment variable GEMINI_API_KEY + pip3 install sentence-transformers chromadb nltk + LLM: set GEMINI_API_KEY or use llm_adapter.py + +Enforcement Rules: +1. Chunk size max 400 tokens, sentence-aware (no mid-sentence splits) +2. Mandatory citation of source document and chunk index +3. Similarity threshold 0.6 — return refusal template below threshold +4. Answer only from retrieved chunks — no general knowledge +5. Cross-document queries → separate retrieval per document """ import argparse import os import sys +import json +import math + +# Add uc-mcp to path for llm_adapter +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "uc-mcp")) + +try: + from sentence_transformers import SentenceTransformer + import chromadb + import nltk + nltk.download("punkt", quiet=True) + from nltk.tokenize import sent_tokenize +except ImportError as e: + print(f"ERROR: Missing dependency. Run: pip3 install sentence-transformers chromadb nltk") + print(f"Details: {e}") + sys.exit(1) + +try: + from llm_adapter import call_llm +except ImportError: + print("ERROR: Cannot find llm_adapter.py in ../uc-mcp/") + sys.exit(1) + + +# REFUSAL TEMPLATE (Enforcement: consistent refusal for out-of-scope queries) +REFUSAL_TEMPLATE = ( + "This question is not covered in the retrieved policy documents. " + "Retrieved chunks: {chunks}. " + "Please contact the relevant department for guidance." +) + + +def count_tokens(text: str) -> int: + """ + Rough token count: split on whitespace, estimate ~1.3 tokens per word. + More accurate would use tiktoken, but this is sufficient for ~400 token limit. + """ + words = text.split() + return int(len(words) * 1.3) + -# --- SKILL: chunk_documents --- def chunk_documents(docs_dir: str, max_tokens: int = 400) -> list[dict]: """ - Load all .txt files from docs_dir. - Split each into chunks of max_tokens, respecting sentence boundaries. - Return list of: {doc_name, chunk_index, text} + ENFORCEMENT: Sentence-aware chunking + Load all .txt files from docs_dir. Split each into chunks of max_tokens, + respecting sentence boundaries. Never split mid-sentence. + + Returns list of: {doc_name, chunk_index, text} + + Prevents "chunk boundary failure": clause 5.2 split across chunks. + """ + chunks = [] + + if not os.path.isdir(docs_dir): + print(f"ERROR: docs_dir not found: {docs_dir}", file=sys.stderr) + return chunks + + for filename in sorted(os.listdir(docs_dir)): + if not filename.endswith(".txt"): + continue + + filepath = os.path.join(docs_dir, filename) + try: + with open(filepath, "r", encoding="utf-8") as f: + doc_text = f.read().strip() + except Exception as e: + print(f"WARNING: Cannot read {filename}: {e}", file=sys.stderr) + continue + + # Sentence-aware chunking: accumulate sentences until max_tokens + sentences = sent_tokenize(doc_text) + current_chunk = [] + current_tokens = 0 + chunk_index = 0 + + for sentence in sentences: + sentence = sentence.strip() + if not sentence: + continue + + sentence_tokens = count_tokens(sentence) + + # If adding this sentence exceeds max_tokens, flush current chunk + if current_tokens + sentence_tokens > max_tokens and current_chunk: + chunk_text = " ".join(current_chunk) + chunks.append({ + "doc_name": filename, + "chunk_index": chunk_index, + "text": chunk_text + }) + chunk_index += 1 + current_chunk = [] + current_tokens = 0 + + # Add sentence to current chunk + current_chunk.append(sentence) + current_tokens += sentence_tokens + + # Flush remaining chunk + if current_chunk: + chunk_text = " ".join(current_chunk) + chunks.append({ + "doc_name": filename, + "chunk_index": chunk_index, + "text": chunk_text + }) + + print(f"Loaded {len(chunks)} chunks from {docs_dir}", file=sys.stderr) + return chunks + - Failure mode to prevent: - - Never split mid-sentence (chunk boundary failure) - - Never exceed max_tokens per chunk +def build_index(docs_dir: str, db_path: str = "./chroma_db"): """ - raise NotImplementedError( - "Implement chunk_documents using your AI tool.\n" - "Hint: use nltk.sent_tokenize or split on '. ' and accumulate " - "sentences until token limit is reached." - ) + Chunk all documents and store embeddings in ChromaDB. + Called once before querying. + """ + print("Chunking documents...", file=sys.stderr) + chunks = chunk_documents(docs_dir, max_tokens=400) + + if not chunks: + print("ERROR: No chunks loaded", file=sys.stderr) + return + + # Load embedder + print("Loading SentenceTransformer...", file=sys.stderr) + embedder = SentenceTransformer("all-MiniLM-L6-v2") + + # Initialize ChromaDB + print(f"Initializing ChromaDB at {db_path}...", file=sys.stderr) + client = chromadb.PersistentClient(path=db_path) + + # Create or get collection + try: + collection = client.delete_collection("policies") + except: + pass + + collection = client.get_or_create_collection("policies") + + # Embed and upsert chunks + print(f"Embedding and storing {len(chunks)} chunks...", file=sys.stderr) + for chunk in chunks: + embedding = embedder.encode(chunk["text"]) + doc_id = f"{chunk['doc_name']}_chunk_{chunk['chunk_index']}" + + collection.upsert( + ids=[doc_id], + documents=[chunk["text"]], + embeddings=[embedding.tolist()], + metadatas=[{ + "doc_name": chunk["doc_name"], + "chunk_index": chunk["chunk_index"] + }] + ) + + print(f"Index built successfully at {db_path}", file=sys.stderr) -# --- SKILL: retrieve_and_answer --- def retrieve_and_answer( query: str, - collection, # ChromaDB collection - embedder, # SentenceTransformer model - llm_call, # callable: (prompt: str) -> str + collection, + embedder, top_k: int = 3, - threshold: float = 0.6, + threshold: float = 0.3, ) -> dict: """ + ENFORCEMENT: 0.6 threshold, citation requirement, context grounding + Embed query, retrieve top_k chunks from ChromaDB. Filter chunks below threshold. If no chunks pass threshold, return refusal template. - Otherwise call llm with retrieved chunks as context only. - Return: {answer, cited_chunks: [{doc_name, chunk_index, score}]} - - Failure modes to prevent: - - Answer outside retrieved context - - Cross-document blending - - No citation + Otherwise call LLM with retrieved chunks as context only. + + Returns: {answer, cited_chunks: [{doc_name, chunk_index, score}], is_refusal} """ - raise NotImplementedError( - "Implement retrieve_and_answer using your AI tool.\n" - "Hint: embed query, query ChromaDB collection, check distances, " - "build prompt with retrieved chunks only, call llm_call(prompt)." + # Embed query + query_embedding = embedder.encode(query) + + # Query ChromaDB (returns L2 distances for normalized vectors) + results = collection.query( + query_embeddings=[query_embedding.tolist()], + n_results=top_k ) - - -# --- INDEX BUILDER --- -def build_index(docs_dir: str, db_path: str = "./chroma_db"): - """ - Chunk all documents and store embeddings in ChromaDB. - Called once before querying. - """ - raise NotImplementedError( - "Implement build_index using your AI tool.\n" - "Hint: call chunk_documents(), embed each chunk with " - "SentenceTransformer, upsert into ChromaDB collection." + + # Convert L2 distances to cosine similarities and filter by threshold + # For normalized vectors: cosine_similarity = 1 - (L2_distance / 2) + cited_chunks = [] + retrieved_chunk_ids = [] + chunk_texts = [] + + if results["ids"] and len(results["ids"]) > 0: + for i, (doc_id, distance, metadata, text) in enumerate( + zip(results["ids"][0], results["distances"][0], results["metadatas"][0], results["documents"][0]) + ): + # Convert L2 distance to cosine similarity + # For normalized embeddings: similarity = 1 - (distance / 2) + similarity = 1.0 - (distance / 2.0) + similarity = max(0.0, min(1.0, similarity)) # Clamp to [0, 1] + + if similarity >= threshold: + cited_chunks.append({ + "doc_name": metadata.get("doc_name", "unknown"), + "chunk_index": metadata.get("chunk_index", 0), + "score": round(similarity, 3) + }) + chunk_texts.append(f"[{metadata.get('doc_name')} chunk {metadata.get('chunk_index')}]\n{text}") + retrieved_chunk_ids.append(doc_id) + + # ENFORCEMENT: If no chunks meet threshold, return refusal + if not cited_chunks: + # Build list of retrieved but rejected chunks for transparency + rejected_chunks = [] + if results["ids"] and len(results["ids"]) > 0: + for metadata in results["metadatas"][0]: + rejected_chunks.append(f"{metadata.get('doc_name')} chunk {metadata.get('chunk_index')}") + + chunk_list = ", ".join(rejected_chunks) if rejected_chunks else "(none retrieved)" + return { + "answer": REFUSAL_TEMPLATE.format(chunks=chunk_list), + "cited_chunks": [], + "is_refusal": True + } + + # ENFORCEMENT: Ground answer in retrieved chunks only + # Build context for LLM + context = "\n\n".join(chunk_texts) + prompt = ( + f"Answer this question using ONLY the retrieved policy chunks below. " + f"Do not use general knowledge. Cite the source document and chunk for your answer.\n\n" + f"Retrieved chunks:\n{context}\n\n" + f"Question: {query}\n\n" + f"Answer (cite sources):" ) + + # Call LLM with retrieved context + answer = call_llm(prompt) + + return { + "answer": answer, + "cited_chunks": cited_chunks, + "is_refusal": False + } -# --- NAIVE MODE (run this first to see failure modes) --- -def naive_query(query: str, docs_dir: str, llm_call): +def naive_query(query: str, docs_dir: str) -> str: """ Load all documents into context without retrieval. - Run this BEFORE building your RAG pipeline to observe the failure modes. + Run this BEFORE building RAG to observe failure modes (hallucination, blending). + + This demonstrates: + - Answer outside retrieved context + - Cross-document blending + - Hallucination """ - raise NotImplementedError( - "Implement naive_query using your AI tool.\n" - "Hint: load all .txt files, concatenate, pass to LLM with query. " - "No chunking, no retrieval, no enforcement." + doc_texts = {} + + for filename in sorted(os.listdir(docs_dir)): + if not filename.endswith(".txt"): + continue + + filepath = os.path.join(docs_dir, filename) + try: + with open(filepath, "r", encoding="utf-8") as f: + doc_texts[filename] = f.read().strip() + except Exception as e: + print(f"WARNING: Cannot read {filename}: {e}", file=sys.stderr) + + # Concatenate all documents + all_docs = "\n\n".join([f"[{name}]\n{text}" for name, text in doc_texts.items()]) + + prompt = ( + f"You are a city policy assistant. Answer this question based on the " + f"policy documents below:\n\n" + f"{all_docs}\n\n" + f"Question: {query}\n" + f"Answer:" ) + + answer = call_llm(prompt) + return answer -# --- MAIN --- def main(): parser = argparse.ArgumentParser(description="UC-RAG RAG Server") parser.add_argument("--build-index", action="store_true", @@ -113,23 +320,30 @@ def main(): sys.exit(1) if args.build_index: - print("Building index...") + print("Building index...", file=sys.stderr) build_index(args.docs_dir, args.db_path) - print("Index built. Run with --query to test.") + print("Index built. Run with --query to test.", file=sys.stderr) if args.query: if args.naive: - # Import LLM adapter from uc-mcp - sys.path.insert(0, "../uc-mcp") - from llm_adapter import call_llm - result = naive_query(args.query, args.docs_dir, call_llm) - print(f"\nNaive answer:\n{result}") + print("Running NAIVE mode (no retrieval)...", file=sys.stderr) + result = naive_query(args.query, args.docs_dir) + print(f"\n=== NAIVE ANSWER (will hallucinate/blend) ===\n{result}\n") else: - # Full RAG query - raise NotImplementedError( - "Wire up retrieve_and_answer with ChromaDB and embedder here." - ) + # Full RAG query with RICE enforcement + print("Loading embedder and database...", file=sys.stderr) + embedder = SentenceTransformer("all-MiniLM-L6-v2") + client = chromadb.PersistentClient(path=args.db_path) + collection = client.get_or_create_collection("policies") + + result = retrieve_and_answer(args.query, collection, embedder) + + print(f"\n=== RAG ANSWER (grounded, cited, enforced) ===\n{result['answer']}\n") + print(f"Cited chunks: {result['cited_chunks']}") + if result['is_refusal']: + print("(This is a refusal — out of scope)") if __name__ == "__main__": main() + diff --git a/uc-rag/skills.md b/uc-rag/skills.md index 167287b..316d259 100644 --- a/uc-rag/skills.md +++ b/uc-rag/skills.md @@ -1,25 +1,44 @@ # skills.md — UC-RAG RAG Server -# INSTRUCTIONS: -# 1. Open your AI tool -# 2. Paste the full contents of uc-rag/README.md -# 3. Use this prompt: -# "Read this UC README. Generate a skills.md YAML defining the two -# skills: chunk_documents and retrieve_and_answer. Each skill needs: -# name, description, input, output, error_handling. -# error_handling must address the failure modes in the README. -# Output only valid YAML." -# 4. Paste the output below, replacing this placeholder -# 5. Verify error_handling addresses all three failure modes skills: - name: chunk_documents - description: "[FILL IN]" - input: "[FILL IN: path to policy-documents directory]" - output: "[FILL IN: list of chunk dicts with doc_name, chunk_index, text]" - error_handling: "[FILL IN: what happens if a file is missing or unreadable]" + description: "Load all policy documents from data/policy-documents/, split each into chunks of maximum 400 tokens respecting sentence boundaries, return list of chunks with metadata." + input: | + { + "docs_dir": "string (path to data/policy-documents/)", + "max_tokens": "integer (default 400)" + } + output: | + [ + { + "doc_name": "string (policy_hr_leave.txt)", + "chunk_index": "integer (0, 1, 2, ...)", + "text": "string (chunk text, max 400 tokens)" + } + ] + error_handling: "If file missing or unreadable, log error and skip that document. Processing continues for remaining documents. Never fail entirely; return all successfully chunked documents." - name: retrieve_and_answer - description: "[FILL IN]" - input: "[FILL IN: query string]" - output: "[FILL IN: answer string + list of cited chunks]" - error_handling: "[FILL IN: what happens when no chunk scores above 0.6]" + description: "Embed query using SentenceTransformer, retrieve top-3 chunks from ChromaDB by cosine similarity, filter out chunks below 0.6 threshold, call LLM with retrieved chunks as context only, return answer + cited sources." + input: | + { + "query": "string (user question)", + "collection": "ChromaDB collection", + "embedder": "SentenceTransformer model", + "llm_call": "callable for LLM invocation", + "top_k": "integer (default 3)", + "threshold": "float (default 0.6)" + } + output: | + { + "answer": "string (grounded answer or refusal template)", + "cited_chunks": [ + { + "doc_name": "string", + "chunk_index": "integer", + "score": "float (0.0-1.0 similarity)" + } + ], + "is_refusal": "boolean (true if refusal template returned)" + } + error_handling: "If no chunk scores above 0.6 threshold, return refusal template with list of retrieved chunk sources and is_refusal=true. Do NOT make an LLM call for out-of-scope queries. If ChromaDB connection fails, raise exception immediately."