NIEHS
diff --git a/‎eval-app/pyproject.toml‎
Lines changed: 4 additions & 3 deletions b/‎eval-app/pyproject.toml‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎eval-app/src/evaluator/data/toxpipe_eval_info/config/raw/providers.yaml‎
Lines changed: 64 additions & 76 deletions b/‎eval-app/src/evaluator/data/toxpipe_eval_info/config/raw/providers.yaml‎
Lines changed: 64 additions & 76 deletions
diff --git a/‎eval-app/src/evaluator/notebooks/create_bulk_tests.ipynb‎
Lines changed: 22 additions & 22 deletions b/‎eval-app/src/evaluator/notebooks/create_bulk_tests.ipynb‎
Lines changed: 22 additions & 22 deletions
@@ -23,7 +23,8 @@ dependencies = [
     "plotly[express]==6.4.0",
     "anywidget==0.9.18",
     "nbformat==5.10.4",
-    "langchain==0.3.27",
-    "langchain-core==0.3.76",
-    "langchain-openai==0.3.33",
+    "langchain>=1.2.8",
+    "langchain-core>=1.2.8",
+    "langchain-openai>=1.1.7",
+    "langchain-mcp-adapters>=0.2.1",
 ]
@@ -62,6 +62,70 @@ base-model:
       id: mistral-large-2
       label: Mistral Large 2
       func: queryLLM
+mcp:
+  providers:
+    - config:
+        temperature: 0
+      id: azure-o3
+      label: o3
+      func: queryToxPipeMCP
+    - config:
+        temperature: 0
+        reasoning_effort: high
+      id: azure-gpt-5
+      label: GPT-5 (high reasoning)
+      func: queryToxPipeMCP
+    - config:
+        temperature: 0
+        reasoning_effort: low
+      id: azure-gpt-5
+      label: GPT-5 (low reasoning)
+      func: queryToxPipeMCP
+    - config:
+        temperature: 0
+      id: azure-gpt-5-nano
+      label: GPT-5 Nano
+      func: queryToxPipeMCP
+    - config:
+        temperature: 0
+      id: azure-gpt-4o
+      label: GPT-4o
+      func: queryToxPipeMCP
+    - config:
+        temperature: 0
+      id: claude-4.5-haiku
+      label: Claude 4.5 Haiku
+      func: queryToxPipeMCP
+    - config:
+        temperature: 0
+      id: claude-4.5-sonnet
+      label: Claude 4.5 Sonnet
+      func: queryToxPipeMCP
+    - config:
+        temperature: 0
+      id: claude-3-7-sonnet
+      label: Claude 3.7 Sonnet
+      func: queryToxPipeMCP
+    - config:
+        temperature: 0
+      id: gemini-2.5-pro
+      label: Gemini 2.5 Pro
+      func: queryToxPipeMCP
+    - config:
+        temperature: 0
+      id: gemini-2.5-flash
+      label: Gemini 2.5 Flash
+      func: queryToxPipeMCP
+    - config:
+        temperature: 0
+      id: llama4-scout-17b-instruct
+      label: Llama 4 Scout 17B (Instruct)
+      func: queryToxPipeMCP
+    - config:
+        temperature: 0
+      id: mistral-large-2
+      label: Mistral Large 2
+      func: queryToxPipeMCP
 rag:
   providers:
     - config:
@@ -138,82 +202,6 @@ rag:
       id: ToxPipeRAG
       label: Toxpipe (RAG) [Mistral Large 2]
       func: queryToxPipeRAG
-mcp:
-  providers:
-    - config:
-        model: azure-o3
-        temp: 0
-      id: ToxPipeMCP
-      label: Toxpipe (MCP) [o3]
-      func: queryToxPipeMCP
-    - config:
-        model: azure-gpt-5
-        temp: 0
-        reasoning_effort: high
-      id: ToxPipeMCP
-      label: Toxpipe (MCP) [GPT-5 High Reasoning]
-      func: queryToxPipeMCP
-    - config:
-        model: azure-gpt-5
-        temp: 0
-        reasoning_effort: low
-      id: ToxPipeMCP
-      label: Toxpipe (MCP) [GPT-5 Low Reasoning]
-      func: queryToxPipeMCP
-    - config:
-        model: azure-gpt-5-nano
-        temp: 0
-      id: ToxPipeMCP
-      label: Toxpipe (MCP) [GPT-5 Nano]
-      func: queryToxPipeMCP
-    - config:
-        model: azure-gpt-4o
-        temp: 0
-      id: ToxPipeMCP
-      label: Toxpipe (MCP) [GPT-4o]
-      func: queryToxPipeMCP
-    - config:
-        model: claude-4.5-haiku
-        temp: 0
-      id: ToxPipeMCP
-      label: Toxpipe (MCP) [Claude 4.5 Haiku]
-      func: queryToxPipeMCP
-    - config:
-        model: claude-4.5-sonnet
-        temp: 0
-      id: ToxPipeMCP
-      label: Toxpipe (MCP) [Claude 4.5 Sonnet]
-      func: queryToxPipeMCP
-    - config:
-        model: claude-3-7-sonnet
-        temp: 0
-      id: ToxPipeMCP
-      label: Toxpipe (MCP) [Claude 3.7 Sonnet]
-      func: queryToxPipeMCP
-    - config:
-        model: gemini-2.5-pro
-        temp: 0
-      id: ToxPipeMCP
-      label: Toxpipe (MCP) [Gemini 2.5 Pro]
-      func: queryToxPipeMCP
-    - config:
-        model: gemini-2.5-flash
-        temp: 0
-      id: ToxPipeMCP
-      label: Toxpipe (MCP) [Gemini 2.5 Flash]
-      func: queryToxPipeMCP
-    - config:
-        model: llama4-scout-17b-instruct
-        temp: 0
-      id: ToxPipeMCP
-      label: Toxpipe (MCP) [Llama 4 Scout 17B (Instruct)]
-      func: queryToxPipeMCP
-    - config:
-        model: mistral-large-2
-        temp: 0
-      id: ToxPipeMCP
-      label: Toxpipe (MCP) [Mistral Large 2]
-      func: queryToxPipeMCP
 agentic:
   providers:
     - config:
 
@@ -2,14 +2,14 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
     "import pandas as pd\n",
     "import yaml\n",
     "import importlib\n",
-    "utils = importlib.import_module(\"eval-app.src.utils\")\n",
+    "utils = importlib.import_module(\"src.utils\")\n",
     "\n",
     "class MyDumper(yaml.Dumper):\n",
     "\n",
@@ -19,7 +19,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -44,7 +44,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -81,7 +81,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -144,7 +144,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -433,7 +433,7 @@
        "[5 rows x 22 columns]"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -446,7 +446,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -462,7 +462,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -744,7 +744,7 @@
        "[5 rows x 22 columns]"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -757,7 +757,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -773,7 +773,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
@@ -966,7 +966,7 @@
        "[474 rows x 6 columns]"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -979,7 +979,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -1016,7 +1016,7 @@
        "Index: []"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1071,7 +1071,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
@@ -1085,7 +1085,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 4/4 [00:00<00:00,  4.90it/s]\n"
+      "100%|██████████| 4/4 [00:02<00:00,  1.84it/s]\n"
      ]
     },
     {
@@ -1099,7 +1099,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 4/4 [00:00<00:00,  6.31it/s]\n"
+      "100%|██████████| 4/4 [00:02<00:00,  1.70it/s]\n"
      ]
     },
     {
@@ -1113,7 +1113,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 4/4 [00:00<00:00,  6.58it/s]\n"
+      "100%|██████████| 4/4 [00:02<00:00,  1.98it/s]\n"
      ]
     },
     {
@@ -1127,7 +1127,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 4/4 [00:00<00:00,  6.79it/s]\n"
+      "100%|██████████| 4/4 [00:01<00:00,  2.03it/s]\n"
      ]
     },
     {
@@ -1141,15 +1141,15 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 4/4 [00:00<00:00,  6.14it/s]\n"
+      "100%|██████████| 4/4 [00:02<00:00,  1.64it/s]\n"
      ]
     }
    ],
    "source": [
     "import yaml\n",
     "import tqdm\n",
     "import importlib\n",
-    "mdb = importlib.import_module(\"eval-app.src.evaluator.src.evaluation.db\")\n",
+    "mdb = importlib.import_module(\"src.evaluator.src.evaluation.db\")\n",
     "\n",
     "def loadYML(file_path):\n",
     "    data = None\n",