huggingface · Rhushya · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026
diff --git a/.gitignore b/.gitignore
@@ -140,3 +140,13 @@ docs/source/_build/
 # Sphinx-gallery generated output
 docs/source/auto_getting_started/
 docs/source/sg_execution_times.rst
+
+# Local agent/tooling artifacts
+graphify-out/
+.agent/
+.hf_space_sync/
+.hf_space_ui_fix/
+
+# Local env evaluation/demo artifacts
+envs/email_triage_env/baseline_results.json
+envs/email_triage_env/PITCH_SCRIPT.md
diff --git a/EmailTriage_GRPO_Train.ipynb b/EmailTriage_GRPO_Train.ipynb
@@ -0,0 +1,86 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "gpuType": "T4",
+      "name": "EmailTriage_GRPO_Train.ipynb"
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": "# Email Triage GRPO Training\n**Runtime \u2192 Change runtime type \u2192 T4 GPU** before running anything.\n\nRun cells **one by one in order.**"
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": "# CELL 1: Install\n# Takes ~3 min. After this finishes \u2192 Runtime \u2192 Restart session \u2192 then run from Cell 2\n!pip install -q \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n!pip install -q --no-deps trl peft accelerate bitsandbytes datasets\nprint(\"Install done \u2014 NOW go to Runtime \u2192 Restart session, then run from Cell 2\")",
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": "### After Cell 1 finishes: **Runtime \u2192 Restart session**. Then run from Cell 2."
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": "# CELL 2: Clone repo\nimport os\nif not os.path.exists('/content/OpenEnv'):\n    !git clone https://github.com/Rhushya/OpenEnv.git /content/OpenEnv\n    print('Cloned')\nelse:\n    print('Already cloned')\nos.chdir('/content/OpenEnv')",
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": "# CELL 3: Setup paths\nimport sys\nsys.path.insert(0, '/content/OpenEnv/src')\nsys.path.insert(0, '/content/OpenEnv/envs')\nsys.path.insert(0, '/content/OpenEnv/envs/email_triage_env')\nsys.path.insert(0, '/content/OpenEnv/envs/email_triage_env/server')\nprint('Paths set')",
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": "# CELL 4: Load model with Unsloth (4-bit, no vLLM needed)\nfrom unsloth import FastLanguageModel\n\nmodel, tokenizer = FastLanguageModel.from_pretrained(\n    model_name       = 'Qwen/Qwen2.5-1.5B',\n    max_seq_length   = 512,\n    dtype            = None,\n    load_in_4bit     = True,\n    fast_inference   = False,\n)\n\nmodel = FastLanguageModel.get_peft_model(\n    model,\n    r                          = 8,\n    target_modules             = ['q_proj', 'v_proj'],\n    lora_alpha                 = 8,\n    lora_dropout               = 0,\n    bias                       = 'none',\n    use_gradient_checkpointing = 'unsloth',\n    random_state               = 42,\n)\nprint('Model loaded with LoRA')",
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": "# CELL 5: Reward functions\nimport re, sys\nsys.path.insert(0, '/content/OpenEnv/envs/email_triage_env')\nsys.path.insert(0, '/content/OpenEnv/envs/email_triage_env/server')\n\nfrom server.email_triage_environment import EmailTriageEnvironment\nfrom models import EmailTriageAction\n\ndef _parse(text):\n    cat = re.search(r'<category>(.*?)</category>', text, re.I)\n    pri = re.search(r'<priority>(\\d+)</priority>', text, re.I)\n    esc = re.search(r'<escalate>(true|false)</escalate>', text, re.I)\n    return (\n        cat.group(1).strip().lower() if cat else 'other',\n        max(1, min(5, int(pri.group(1)))) if pri else 1,\n        esc.group(1).lower() == 'true' if esc else False,\n        bool(cat and pri and esc)\n    )\n\ndef _score(prompt, completion):\n    p = completion if isinstance(completion, str) else (completion[0]['content'] if isinstance(completion, list) else str(completion))\n    cat, pri, esc, fmt = _parse(p)\n    m = re.search(r'seed[:\\s]+(\\d+)', str(prompt), re.I)\n    seed = int(m.group(1)) if m else 0\n    try:\n        env = EmailTriageEnvironment(difficulty='easy')\n        env.reset(seed=seed)\n        obs = env.step(EmailTriageAction(category=cat, priority=pri, should_escalate=esc))\n        info = obs.info or {}\n        quality = (0.5*float(info.get('category_score', 0))\n                 + 0.2*float(info.get('priority_score', 0))\n                 + 0.3*float(info.get('escalation_score', 0)))\n    except Exception:\n        quality = 0.0\n    return quality, 1.0 if fmt else -1.0\n\ndef reward_quality(prompts, completions, **kw):\n    return [_score(p, c)[0] for p, c in zip(prompts, completions)]\n\ndef reward_format(prompts, completions, **kw):\n    return [_score(p, c)[1] for p, c in zip(prompts, completions)]\n\nprint('Reward functions ready')",
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": "# CELL 6: Dataset\nfrom datasets import Dataset\n\nSYSTEM = (\n    'You are an email triage agent. Reply ONLY with these 3 XML tags:\\n'\n    '<category>CATEGORY</category>\\n'\n    '<priority>N</priority>\\n'\n    '<escalate>true|false</escalate>\\n'\n    'Valid categories: billing support spam urgent marketing other\\n'\n    'Priority 1=low 5=critical'\n)\n\nEMAILS = [\n    'Subject: Invoice overdue\\nMy invoice #{s} is 30 days unpaid. Please resolve.',\n    'Subject: Cannot login\\nLocked out of account since yesterday. seed {s}',\n    'Subject: Buy cheap meds\\nClick here for discounts ref={s}',\n    'Subject: URGENT DB breach\\nProduction database compromised RIGHT NOW seed {s}',\n    'Subject: Newsletter\\nThanks for subscribing id={s}',\n    'Subject: Refund request\\nOrder {s} arrived damaged, need refund',\n]\n\nprompts = [\n    [{'role': 'system', 'content': SYSTEM},\n     {'role': 'user',   'content': EMAILS[i % len(EMAILS)].format(s=i)}]\n    for i in range(64)\n]\ndataset = Dataset.from_dict({'prompt': prompts})\nprint(f'Dataset: {len(dataset)} prompts')",
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": "# CELL 7: TRAIN\nfrom trl import GRPOConfig, GRPOTrainer\n\nconfig = GRPOConfig(\n    output_dir                  = '/content/email-triage-grpo',\n    max_steps                   = 50,\n    per_device_train_batch_size = 1,\n    gradient_accumulation_steps = 4,\n    num_generations             = 4,\n    max_completion_length       = 128,\n    temperature                 = 0.9,\n    learning_rate               = 5e-6,\n    logging_steps               = 1,\n    save_steps                  = 25,\n    fp16                        = True,\n    report_to                   = 'none',\n    dataloader_pin_memory       = False,\n)\n\ntrainer = GRPOTrainer(\n    model            = model,\n    processing_class = tokenizer,\n    reward_funcs     = [reward_quality, reward_format],\n    train_dataset    = dataset,\n    args             = config,\n)\n\nprint('Starting training...')\ntrainer.train()\ntrainer.save_model('/content/email-triage-grpo')\ntokenizer.save_pretrained('/content/email-triage-grpo')\nprint('DONE — model saved to /content/email-triage-grpo')",
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": "# CELL 8: Push to HuggingFace Hub (run after training)\nfrom huggingface_hub import HfApi\n\nHF_TOKEN = ''            # paste your token here: hf_...\nREPO_ID  = 'Rhushya/oversight-arena-grpo'\n\napi = HfApi()\napi.upload_folder(\n    folder_path    = '/content/email-triage-grpo',\n    repo_id        = REPO_ID,\n    repo_type      = 'model',\n    token          = HF_TOKEN,\n    commit_message = 'GRPO Email Triage 50 steps',\n)\nprint(f'Uploaded to https://huggingface.co/{REPO_ID}')",
+      "outputs": [],
+      "execution_count": null
+    }
+  ]
+}
diff --git a/envs/email_triage_env/.env.example b/envs/email_triage_env/.env.example
@@ -0,0 +1,13 @@
+# Required for inference.py
+API_BASE_URL=https://router.huggingface.co/v1
+MODEL_NAME=meta-llama/Meta-Llama-3-8B-Instruct
+# Preferred generic key variable for OpenAI-compatible providers
+API_KEY=
+
+# Backward-compatible key names (any one works)
+HF_TOKEN=hf_your_token_here
+GROQ_API_KEY=
+OPENAI_API_KEY=
+
+# Optional
+LOCAL_IMAGE_NAME=email-triage-env-openenv:latest
diff --git a/envs/email_triage_env/1.ipynb b/envs/email_triage_env/1.ipynb
@@ -0,0 +1,153 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# OpenEnv Email Triage - Final Colab T4 Notebook\n",
+    "\n",
+    "This notebook is prepared for **Google Colab Free Tier (T4 GPU)** and the repo:\n",
+    "- https://github.com/Rhushya/OpenEnv\n",
+    "\n",
+    "Key rule:\n",
+    "- Keep shell commands (`!python ...`) and Python code (`print(...)`) in separate cells."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7eca96a8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!nvidia-smi"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d5d998b5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!git clone https://github.com/Rhushya/OpenEnv.git\n",
+    "%cd OpenEnv\n",
+    "!pip install -U pip\n",
+    "!pip install \"torch>=2.3\" \"transformers>=4.46\" \"trl>=0.11.0\" \"accelerate>=0.34\" datasets huggingface_hub bitsandbytes fastmcp"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fbc5a58f",
+   "metadata": {},
+   "source": [
+    "## Smoke test (must pass)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c167e07b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!PYTHONPATH=src:envs python envs/email_triage_env/train_grpo.py --smoke"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b174df89",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"Smoke test complete. If this passed, run full training.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "92ece11d",
+   "metadata": {},
+   "source": [
+    "## Full T4 training run"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "09cf7fa5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!PYTHONPATH=src:envs python envs/email_triage_env/train_grpo.py --model Qwen/Qwen2-0.5B --max-steps 50 --dataset-size 64 --output-dir oversight-arena-grpo-t4"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"\\nTraining complete. Checkpoint saved to oversight-arena-grpo-t4/\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Push model to Hugging Face Hub"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!huggingface-cli login"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!PYTHONPATH=src:envs python envs/email_triage_env/train_grpo.py --model Qwen/Qwen2-0.5B --max-steps 50 --dataset-size 64 --output-dir oversight-arena-grpo-t4 --push-to-hub --hub-repo Rhushya/oversight-arena-grpo-t4"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Troubleshooting\n",
+    "\n",
+    "- `ModuleNotFoundError: fastmcp` -> rerun install cell.\n",
+    "- `ModuleNotFoundError: core` -> pull latest repo and rerun.\n",
+    "- CUDA OOM -> use `--max-steps 30 --dataset-size 32`.\n",
+    "- If installs were changed, restart runtime before rerun."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/envs/email_triage_env/FINAL_SHOWCASE_README.md b/envs/email_triage_env/FINAL_SHOWCASE_README.md
@@ -0,0 +1,102 @@
+# Oversight Inbox Arena - Final Showcase README
+
+This file is your final handoff for presentation day.
+It has two parts:
+- what is already completed
+- what you must do next to present confidently
+
+## Project Status
+
+Current status: **production-ready demo + training pipeline ready**
+
+You now have:
+- cleaned repository (tool/cache artifacts removed)
+- fixed environment/runtime issues from review
+- tests moved to standard `tests/` path
+- passing core env and HTTP tests
+- polished Gradio UI with cyber-style hero section for demo
+- Colab T4 training path and Hugging Face deployment guide
+
+## What Is Completed
+
+### Code and Quality Fixes
+
+- Removed non-repo artifacts and generated files
+- Fixed `train_grpo.py` shebang and cache thread safety
+- Corrected UI category choices and state access
+- Kept API app decoupled from Gradio server startup
+- Converted HTTP script into pytest-discoverable test
+
+### Validation Completed
+
+These tests were run successfully:
+
+```powershell
+$env:PYTHONPATH='src;envs'
+.venv\Scripts\python -m pytest tests/envs/test_email_triage_env.py tests/envs/test_email_triage_http.py -v --tb=short
+```
+
+Result: **all tests passed**
+
+### Demo UX Completed
+
+- Hero-style UI messaging for judges
+- Clear reward breakdown display
+- Difficulty modes + schema drift visibility
+- Cohesive "AI coordinator vs specialist disagreement" story
+
+## What You Need To Do Now (Final Steps)
+
+## 1) Train model on Colab T4
+
+Follow `README_NEXT_STEPS.md` and run:
+- one smoke run
+- one main run (`Qwen/Qwen2-0.5B`, 30-50 steps)
+- push checkpoint to Hugging Face Hub
+
+## 2) Deploy Hugging Face Space
+
+Use the ready template folder:
+- `envs/email_triage_env/hf_space_template/`
+
+Create your Space and upload those files.
+
+## 3) Collect your 3 final links
+
+Before submission/presentation, keep these ready:
+- GitHub repo URL
+- Hugging Face Model URL
+- Hugging Face Space URL
+
+## 4) Rehearse 2-3 minute demo
+
+Sequence:
+1. Problem statement
+2. Environment design (4 specialists + coordinator + drift)
+3. RL training on T4
+4. Live Space run and reward breakdown
+5. Final result + links
+
+## Presentation Script (Short)
+
+Use this structure:
+
+1. "We built a multi-agent RL email triage environment where one coordinator oversees 4 specialist agents."
+2. "The task includes policy/schema drift, so the model must adapt in real time."
+3. "We trained with GRPO on Colab T4 using 5 independent reward functions."
+4. "Here is the live Space demo showing coordinator decisions and reward components."
+5. "Here are the model and project links."
+
+## Risk Checklist Before Going Live
+
+- [ ] Space opens without errors
+- [ ] Start Queue works
+- [ ] Submit Decision works
+- [ ] One hard/adversarial run completes
+- [ ] Backup recording prepared (in case of network issue)
+- [ ] All links copied in one note
+
+## Final Verdict
+
+The technical project is complete enough to present now.
+Your remaining work is deployment execution + rehearsal, not core development.