From ca091e7ddf71f5b006092da5409a10f1ddcced7a Mon Sep 17 00:00:00 2001
From: Samuel Arogbonlo <47984109+samuelarogbonlo@users.noreply.github.com>
Date: Tue, 14 Jan 2025 10:36:36 +0100
Subject: [PATCH] feat: Add realtime visualization dashboard

---
 .gitignore                     |   9 +-
 ai_lab_repo.py                 |  36 +++++-
 components/AgentVisualizer.jsx | 175 ++++++++++++++++++++++++++++
 mlesolver.py                   | 136 ++++++++--------------
 test-visualization.py          | 201 +++++++++++++++++++++++++++++++++
 visualization.py               |  56 +++++++++
 6 files changed, 514 insertions(+), 99 deletions(-)
 create mode 100644 components/AgentVisualizer.jsx
 create mode 100644 test-visualization.py
 create mode 100644 visualization.py
diff --git a/.gitignore b/.gitignore
index ea6f4be..aa3d848 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,4 +13,11 @@ research_dir/*
 state_saves/*
 __pycache__/*
 Figure*.png
-testrun.py
\ No newline at end of file
+testrun.py
+# Agent logs
+agent_logs/*
+!agent_logs/.gitkeep
+__pycache__/
+agent_logs/
+research_dir/
+*.pyc
\ No newline at end of file
diff --git a/ai_lab_repo.py b/ai_lab_repo.py
index dbe9541..782550e 100755
--- a/ai_lab_repo.py
+++ b/ai_lab_repo.py
@@ -139,6 +139,7 @@ def perform_research(self):
         Loop through all research phases
         @return: None
         """
+        create_visualization_interface()
         for phase, subtasks in self.phases:
             phase_start_time = time.time()  # Start timing the phase
             if self.verbose: print(f"{'*'*50}\nBeginning phase: {phase}\n{'*'*50}")
@@ -198,6 +199,8 @@ def perform_research(self):
                 phase_duration = phase_end_time - phase_start_time
                 print(f"Subtask '{subtask}' completed in {phase_duration:.2f} seconds.")
                 self.statistics_per_phase[subtask]["time"] = phase_duration
+                if os.path.exists("agent_logs/mle_solver_latest.json"):
+                    print(f"\nVisualization available at research_dir/visualization.html\n")
 
     def report_refinement(self):
         """
@@ -526,6 +529,33 @@ def human_in_loop(self, phase, phase_prod):
             else: print("Invalid response, type Y or N")
         return False
 
+def create_visualization_interface():
+    """Create a simple React interface for visualizations"""
+    visualization_html = """
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <title>Agent Laboratory Visualization</title>
+        <script src="https://unpkg.com/react@17/umd/react.development.js"></script>
+        <script src="https://unpkg.com/react-dom@17/umd/react-dom.development.js"></script>
+    </head>
+    <body>
+        <div id="root"></div>
+        <script type="text/javascript">
+            import { AgentVisualizer } from './components/AgentVisualizer';
+
+            ReactDOM.render(
+                React.createElement(AgentVisualizer, { agentName: "mle_solver" }),
+                document.getElementById('root')
+            );
+        </script>
+    </body>
+    </html>
+    """
+
+    # Save the visualization interface
+    with open("research_dir/visualization.html", "w") as f:
+        f.write(visualization_html)
 
 
 def parse_arguments():
@@ -728,9 +758,3 @@ def parse_arguments():
         )
 
     lab.perform_research()
-
-
-
-
-
-
diff --git a/components/AgentVisualizer.jsx b/components/AgentVisualizer.jsx
new file mode 100644
index 0000000..37733a7
--- /dev/null
+++ b/components/AgentVisualizer.jsx
@@ -0,0 +1,175 @@
+import React, { useState, useEffect } from 'react';
+import { LineChart, Line, XAxis, YAxis, CartesianGrid, Tooltip, Legend, ResponsiveContainer } from 'recharts';
+
+// Card components
+const Card = ({ children, className = '' }) => (
+    <div className={`bg-white p-6 rounded-lg shadow-md ${className}`}>{children}</div>
+);
+
+const CardHeader = ({ children }) => (
+    <div className="mb-4">{children}</div>
+);
+
+const CardContent = ({ children }) => (
+    <div>{children}</div>
+);
+
+const CardTitle = ({ children }) => (
+    <h2 className="text-xl font-bold text-gray-700">{children}</h2>
+);
+
+const AgentLabVisualization = () => {
+    const [logData, setLogData] = useState([]);
+    const [selectedStep, setSelectedStep] = useState(null);
+    const [error, setError] = useState(null);
+    const [loading, setLoading] = useState(true);
+
+    useEffect(() => {
+        const loadData = async () => {
+            try {
+                const response = await window.fs.readFile('agent_logs/mle_solver_latest.json');
+                const data = JSON.parse(new TextDecoder().decode(response));
+                setLogData(data);
+            } catch (err) {
+                console.error('Error loading log data:', err);
+                setError(err.message);
+            } finally {
+                setLoading(false);
+            }
+        };
+
+        loadData();
+    }, []);
+
+    const formatScore = (score) => (score * 100).toFixed(1) + '%';
+
+    if (loading) {
+        return (
+            <div className="w-full h-screen flex items-center justify-center">
+                <div className="text-lg text-gray-600">Loading data...</div>
+            </div>
+        );
+    }
+
+    if (error) {
+        return (
+            <div className="w-full h-screen flex items-center justify-center">
+                <div className="text-lg text-red-600">Error: {error}</div>
+            </div>
+        );
+    }
+
+    return (
+        <div className="w-full mx-auto p-4 bg-gray-50 min-h-screen">
+            <h1 className="text-3xl font-bold mb-6 text-gray-800">Agent Lab Progress</h1>
+
+            <Card className="mb-6">
+                <CardHeader>
+                    <CardTitle>Score Progress Over Time</CardTitle>
+                </CardHeader>
+                <CardContent>
+                    <div className="h-80">
+                        <ResponsiveContainer width="100%" height="100%">
+                            <LineChart data={logData}>
+                                <CartesianGrid strokeDasharray="3 3" stroke="#e5e7eb" />
+                                <XAxis
+                                    dataKey="step"
+                                    tick={{ fill: '#4b5563' }}
+                                    stroke="#9ca3af"
+                                />
+                                <YAxis
+                                    domain={[0, 1]}
+                                    tickFormatter={formatScore}
+                                    tick={{ fill: '#4b5563' }}
+                                    stroke="#9ca3af"
+                                />
+                                <Tooltip
+                                    formatter={formatScore}
+                                    contentStyle={{
+                                        backgroundColor: 'white',
+                                        border: '1px solid #e5e7eb',
+                                        borderRadius: '0.375rem'
+                                    }}
+                                />
+                                <Legend />
+                                <Line
+                                    type="monotone"
+                                    dataKey="score"
+                                    stroke="#2563eb"
+                                    strokeWidth={2}
+                                    dot={{ stroke: '#2563eb', strokeWidth: 2 }}
+                                    activeDot={{ r: 6 }}
+                                    name="Score"
+                                />
+                            </LineChart>
+                        </ResponsiveContainer>
+                    </div>
+                </CardContent>
+            </Card>
+
+            <div className="grid grid-cols-1 md:grid-cols-2 gap-6">
+                <Card>
+                    <CardHeader>
+                        <CardTitle>Commands</CardTitle>
+                    </CardHeader>
+                    <CardContent>
+                        <div className="h-96 overflow-y-auto">
+                            {logData.map((entry, idx) => (
+                                <div
+                                    key={idx}
+                                    onClick={() => setSelectedStep(idx)}
+                                    className={`p-4 rounded-md cursor-pointer transition duration-200 ${
+                                        selectedStep === idx
+                                            ? 'bg-blue-50 border border-blue-200'
+                                            : 'hover:bg-gray-50 border border-transparent'
+                                    }`}
+                                >
+                                    <div className="flex justify-between items-center">
+                                        <span className="font-medium text-gray-700">Step {entry.step}</span>
+                                        <span className="text-sm text-gray-500">{entry.command}</span>
+                                    </div>
+                                    <div className="text-sm text-gray-600 mt-1">
+                                        Score: {formatScore(entry.score)}
+                                    </div>
+                                </div>
+                            ))}
+                        </div>
+                    </CardContent>
+                </Card>
+
+                <Card>
+                    <CardHeader>
+                        <CardTitle>Step Details</CardTitle>
+                    </CardHeader>
+                    <CardContent>
+                        {selectedStep !== null && logData[selectedStep] ? (
+                            <div className="space-y-6">
+                                <div>
+                                    <h3 className="font-medium text-gray-700">Code Changes</h3>
+                                    <pre className="bg-gray-50 p-4 rounded-md mt-2 overflow-x-auto border border-gray-200">
+                                        <code className="text-sm">{logData[selectedStep].code_lines.join('\n')}</code>
+                                    </pre>
+                                </div>
+                                <div>
+                                    <h3 className="font-medium text-gray-700">Model Response</h3>
+                                    <p className="text-gray-600 mt-2 leading-relaxed">
+                                        {logData[selectedStep].model_response}
+                                    </p>
+                                </div>
+                                <div className="text-sm text-gray-500">
+                                    {new Date(logData[selectedStep].timestamp).toLocaleString()}
+                                </div>
+                            </div>
+                        ) : (
+                            <div className="text-center text-gray-500 py-8">
+                                Select a step to view details
+                            </div>
+                        )}
+                    </CardContent>
+                </Card>
+            </div>
+        </div>
+    );
+};
+
+export default AgentLabVisualization;
\ No newline at end of file
diff --git a/mlesolver.py b/mlesolver.py
index cfc4896..9057836 100755
--- a/mlesolver.py
+++ b/mlesolver.py
@@ -4,11 +4,10 @@
 from common_imports import *
 from abc import abstractmethod
 
-
 from tools import *
 from inference import *
 from pathlib import Path
-
+from visualization import VisualizationManager
 
 from contextlib import contextmanager
 import sys, os
@@ -56,12 +55,6 @@ def parse_command(self, cmd_str) -> tuple:
         pass
 
 
-"""
-@@@@@@@@@@@@@@@@@@
-@@ CODING TOOLS @@
-@@@@@@@@@@@@@@@@@@
-"""
-
 class Replace(Command):
     def __init__(self):
         super().__init__()
@@ -76,7 +69,6 @@ def docstring(self) -> str:
         )
 
     def execute_command(self, *args) -> str:
-        # args[0] -> new code
         args = args[0]
         return args[0]
 
@@ -92,7 +84,6 @@ def parse_command(self, *args) -> tuple:
         return True, (new_code.split("\n"), code_ret)
 
 
-
 class Edit(Command):
     def __init__(self):
         super().__init__()
@@ -107,11 +98,6 @@ def docstring(self) -> str:
         )
 
     def execute_command(self, *args) -> str:
-        # args[0] -> N (int)
-        # args[1] -> M (int)
-        # args[2] -> old code
-        # args[3] -> new lines to replace
-        # args[4] -> new lines to replace
         try:
             args = args[0]
             current_code = args[2]
@@ -124,7 +110,7 @@ def execute_command(self, *args) -> str:
             new_code = "\n".join(current_code)
             code_exec = f"{args[4]}\n{new_code}"
             code_ret = execute_code(code_exec)
-            if "CODE EXECUTION ERROR" in code_ret: return (False, None, code_ret)
+            if "[CODE EXECUTION ERROR]" in code_ret: return (False, None, code_ret)
             return (True, current_code, code_ret)
         except Exception as e:
             return (False, None, str(e))
@@ -148,68 +134,6 @@ def parse_command(self, *args) -> tuple:
             return False, (None, None, None, None, None)
 
 
-def get_score(outlined_plan, code, code_return, REWARD_MODEL_LLM, attempts=3, openai_api_key=None):
-    e = str()
-    for _attempt in range(attempts):
-        try:
-            # todo: have a reward function here
-            sys = (
-                f"You are a professor agent who is serving as an expert reward model that can read a research plan, research code, and code output and are able to determine how well a model followed the plan, built the code, and got the proper output scored from 0 to 1 as a float.\n\n"
-                f"You must structure your score exactly in the following way: ```SCORE\n<score here>\n``` where SCORE is just the word score, <score here> is a floating point number between 0 and 1 representing how well the model followed the plan, built the code, and got the proper output."
-            )
-            scoring = query_model(
-                model_str=f"{REWARD_MODEL_LLM}",
-                system_prompt=sys,
-                openai_api_key=openai_api_key,
-                prompt=(
-                    f"Outlined in the following text is the research plan that the machine learning engineer was tasked with building: {outlined_plan}\n\n"
-                    f"The following text is the research code that the model produced: \n{code}\n\n"
-                    f"The following is the output from the model: {code_return}\n\n"), temp=0.6)
-            performance = extract_prompt(text=scoring, word="SCORE")
-            performance = float(performance)
-            return performance, f"The performance of your submission is: {performance}", True
-        except Exception as e:
-            return None, str(e), False
-    return 0, e
-
-
-def code_repair(code, error, ctype, REPAIR_LLM, openai_api_key=None):
-    if ctype == "replace":
-        repair_sys = (
-            "You are an automated code repair tool.\n"
-            "Your goal is to take in code and an error and repair the code to make sure the same error does not repeat itself, and also to remove any other potential errors from the code without affecting the code output.\n"
-            "Your output should match the original code as closely as possible.\n"
-            "You must wrap the code in the following ```python\n<code here>\n```\n"
-            "Do not forget the opening ```python and the closing ```."
-        )
-        model_resp = query_model(
-            openai_api_key=openai_api_key,
-            model_str=f"{REPAIR_LLM}",
-            system_prompt=repair_sys,
-            prompt=f"Provided here is the error: {error}\n\nProvided below is the code:\n\n{code}", temp=0.8)
-        return extract_prompt(model_resp, "python")
-    elif ctype == "edit":
-        repair_sys = (
-            "You are an automated code repair tool.\n"
-            "Your goal is to take in code and an error and repair the code to make sure the same error does not repeat itself, and also to remove any other potential errors from the code without affecting the code output.\n"
-            "Your output should match the original code as closely as possible.\n"
-            
-            "============= CODE EDITING TOOL =============\n"
-            "You have access to a code editing tool. \n"
-            "This tool allows you to replace lines indexed n through m (n:m) of the current code with as many lines of new code as you want to add. This removal is inclusive meaning that line n and m and everything between n and m is removed. This will be the primary way that you interact with code. \n"
-            "You can edit code using the following command: ```EDIT N M\n<new lines to replace old lines>\n``` EDIT is the word EDIT, N is the first line index you want to replace and M the the last line index you want to replace (everything inbetween will also be removed), and <new lines to replace old lines> will be the new code that is replacing the old code. Before changing the existing code to be your new code, your new code will be tested and if it returns an error it will not replace the existing code.\n"
-            "Please use the code editing tool to fix this code."
-            "Do not forget the opening ```EDIT N M and the closing ```."
-            "Your output should look like the following\n\n```EDIT N M\n<new lines to replace old lines>\n```"
-        )
-        model_resp = query_model(
-            openai_api_key=openai_api_key,
-            model_str=f"{REPAIR_LLM}",
-            system_prompt=repair_sys,
-            prompt=f"Provided here is the error: {error}\n\nProvided below is the code:\n\n{code}", temp=0.2)
-        return model_resp
-
-
 class MLESolver:
     def __init__(self, dataset_code, openai_api_key=None, notes=None, max_steps=10, insights=None, plan=None, llm_str=None):
         if notes is None: self.notes = []
@@ -231,14 +155,15 @@ def __init__(self, dataset_code, openai_api_key=None, notes=None, max_steps=10,
         self.should_execute_code = True
         self.openai_api_key = openai_api_key
 
+        # Initialize visualization manager
+        self.viz_manager = VisualizationManager()
+        self.logger = self.viz_manager.get_logger("mle_solver")
+
     def initial_solve(self):
         """
         Initialize the solver and get an initial set of code and a return
         @return: None
         """
-        # @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-        # @@ Initial CodeGen Commands @@
-        # @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
         self.best_score = None
         self.commands = [Replace()]
         self.model = f"{self.llm_str}"
@@ -276,6 +201,16 @@ def gen_initial_code(self):
                 prompt=f"{err_hist}\nYou should now use ```REPLACE to create initial code to solve the challenge. Now please enter the ```REPLACE command below:\n ", temp=1.0)
             model_resp = self.clean_text(model_resp)
             cmd_str, code_lines, prev_code_ret, should_execute_code, score = self.process_command(model_resp)
+
+            # Log initial solve attempt
+            self.logger.log_step({
+                'phase': 'initial_solve',
+                'attempt': num_attempts,
+                'command': cmd_str,
+                'code_lines': code_lines,
+                'score': score
+            })
+
             print(f"@@@ INIT ATTEMPT: Command Exec // Attempt {num_attempts}: ", str(cmd_str).replace("\n", " | "))
             print(f"$$$ Score: {score}")
             if score is not None: break
@@ -288,6 +223,7 @@ def solve(self):
         top_score = None
         self.prev_code_ret = None
         self.should_execute_code = False
+
         while True:
             if len(self.commands) == 2: cmd_app_str = "You must output either the ```EDIT or ```REPLACE command immediately. "
             else: cmd_app_str = ""
@@ -299,6 +235,17 @@ def solve(self):
             model_resp = self.clean_text(model_resp)
             self.code_lines = copy(random.choice(self.best_codes)[0])
             cmd_str, code_lines, prev_code_ret, should_execute_code, score = self.process_command(model_resp)
+
+            # Log solve attempt
+            self.logger.log_step({
+                'phase': 'solve',
+                'attempt': num_attempts,
+                'command': cmd_str,
+                'code_lines': code_lines,
+                'score': score,
+                'model_response': model_resp
+            })
+
             self.st_history.append([model_resp, prev_code_ret, code_lines, cmd_str])
             if len(self.st_history) > self.st_hist_len: self.st_history.pop(0)
             if score is not None:
@@ -312,7 +259,20 @@ def solve(self):
             print(f"$$$ Score: {score}")
             if num_attempts >= self.min_gen_trials and top_score is not None: break
             num_attempts += 1
+
         self.code_lines, self.prev_code_ret, self.should_execute_code, model_resp, cmd_str = best_pkg
+
+        # Log best result
+        self.logger.log_step({
+            'phase': 'solve_complete',
+            'final_score': top_score,
+            'best_code_lines': self.code_lines,
+            'total_attempts': num_attempts
+        })
+
+        # Save logs
+        self.logger.save_logs()
+
         # add top scoring code that was successful to the best codes
         if top_score > self.best_codes[-1][1]:
             # replace the lowest scoring one
@@ -325,10 +285,6 @@ def solve(self):
         return model_resp, cmd_str
 
     def reflect_code(self):
-        """
-        Provide a reflection on produced behavior for next execution
-        @return: (str) language model-produced reflection
-        """
         code_strs = ("$"*40 + "\n\n").join([self.generate_code_lines(_code[0]) + f"\nCode Return {_code[1]}" for _code in self.best_codes])
         code_strs = f"Please reflect on the following sets of code: {code_strs} and come up with generalizable insights that will help you improve your performance on this benchmark."
         syst = self.system_prompt(commands=False) + code_strs
@@ -358,7 +314,7 @@ def process_command(self, model_resp):
                         failed = True
                         code_err = str()
                         for _tries in range(GLOBAL_REPAIR_ATTEMPTS):
-                            success, args = cmd.parse_command(model_resp, copy(self.code_lines), self.dataset_code)
+                            success, args = cmd.parse_command(model_resp,copy(self.code_lines), self.dataset_code)
                             if success:
                                 cmd_return = cmd.execute_command(args)
                                 code_err = f"Return from executing code: {cmd_return[2]}"
@@ -447,7 +403,7 @@ def system_prompt(self, commands=True):
             f"The following are notes, instructions, and general tips for you: {self.notes}"
             # PLAN DESCRIPTION
             f"You are given a machine learning research task described, where the plan is described as follows: {self.plan}\n"
-            # DATASET DESCRIPTION            
+            # DATASET DESCRIPTION
             f"{self.generate_dataset_descr_prompt()}"
             # Create Figures
             f"You should also try generating at least two figures to showcase the results, titled Figure_1.png and Figure_2.png\n"
@@ -568,8 +524,4 @@ def run_code(self):
             return self.prev_code_ret
         elif self.should_execute_code:
             return execute_code("\n".join(self.code_lines))
-        return "Changes have not yet been made to the code."
-
-
-
-
+        return "Changes have not yet been made to the code."
\ No newline at end of file
diff --git a/test-visualization.py b/test-visualization.py
new file mode 100644
index 0000000..ee4ff8d
--- /dev/null
+++ b/test-visualization.py
@@ -0,0 +1,201 @@
+import os
+import json
+import random
+from datetime import datetime
+
+def generate_test_logs(num_entries=50):
+    """Generate test log entries"""
+    logs = []
+    start_time = datetime.now()
+
+    for i in range(num_entries):
+        log_entry = {
+            "timestamp": start_time.isoformat(),
+            "step": i,
+            "command": "EDIT" if i % 2 == 0 else "REPLACE",
+            "code_lines": [f"print('Step {i}')"],
+            "score": 0.4 + random.random() * 0.2,
+            "model_response": f"Test response for step {i}"
+        }
+        logs.append(log_entry)
+
+    return logs
+
+def save_logs(logs, directory="agent_logs"):
+    """Save logs to a JSON file"""
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"mle_solver_{timestamp}.json"
+    filepath = os.path.join(directory, filename)
+
+    with open(filepath, 'w') as f:
+        json.dump(logs, f, indent=2)
+
+    # Create or update latest symlink
+    latest_link = os.path.join(directory, "mle_solver_latest.json")
+    if os.path.exists(latest_link):
+        os.remove(latest_link)
+    with open(latest_link, 'w') as f:
+        json.dump(logs, f, indent=2)
+
+    return filepath
+
+def create_visualization_html(log_file_path):
+    """Create the visualization HTML file"""
+    if not os.path.exists("research_dir"):
+        os.makedirs("research_dir")
+
+    html_content = """<!DOCTYPE html>
+<html>
+<head>
+    <title>Agent Lab Visualization</title>
+    <meta charset="utf-8">
+    <script src="https://unpkg.com/react@18/umd/react.production.min.js"></script>
+    <script src="https://unpkg.com/react-dom@18/umd/react-dom.production.min.js"></script>
+    <script src="https://unpkg.com/babel-standalone@6/babel.min.js"></script>
+    <script src="https://unpkg.com/recharts/umd/Recharts.js"></script>
+    <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
+    <style>
+        body {
+            margin: 0;
+            padding: 0;
+            font-family: system-ui, -apple-system, sans-serif;
+        }
+    </style>
+</head>
+<body>
+    <div id="root"></div>
+    <script type="text/babel">
+        const { useState, useEffect } = React;
+        const { LineChart, Line, XAxis, YAxis, CartesianGrid, Tooltip, Legend, ResponsiveContainer } = Recharts;
+
+        function App() {
+            const [data, setData] = useState([]);
+            const [selectedStep, setSelectedStep] = useState(null);
+
+            useEffect(() => {
+                fetch('../agent_logs/mle_solver_latest.json')
+                    .then(res => res.json())
+                    .then(data => setData(data))
+                    .catch(err => console.error('Error loading data:', err));
+            }, []);
+
+            const formatScore = (score) => (score * 100).toFixed(1) + '%';
+
+            return (
+                <div className="w-full max-w-7xl mx-auto p-4">
+                    <h1 className="text-3xl font-bold mb-6">Agent Lab Progress</h1>
+
+                    {/* Chart */}
+                    <div className="bg-white p-4 rounded-lg shadow mb-6">
+                        <h2 className="text-xl font-bold mb-4">Score Progress</h2>
+                        <div className="h-80">
+                            <ResponsiveContainer width="100%" height="100%">
+                                <LineChart data={data}>
+                                    <CartesianGrid strokeDasharray="3 3" />
+                                    <XAxis dataKey="step" />
+                                    <YAxis domain={[0, 1]} tickFormatter={formatScore} />
+                                    <Tooltip formatter={formatScore} />
+                                    <Legend />
+                                    <Line type="monotone" dataKey="score" stroke="#2563eb" />
+                                </LineChart>
+                            </ResponsiveContainer>
+                        </div>
+                    </div>
+
+                    <div className="grid grid-cols-1 md:grid-cols-2 gap-6">
+                        {/* Command List */}
+                        <div className="bg-white p-4 rounded-lg shadow">
+                            <h2 className="text-xl font-bold mb-4">Commands</h2>
+                            <div className="h-96 overflow-y-auto">
+                                {data.map((entry, idx) => (
+                                    <div
+                                        key={idx}
+                                        onClick={() => setSelectedStep(idx)}
+                                        className={\`p-3 rounded cursor-pointer \${
+                                            selectedStep === idx ? 'bg-blue-100' : 'hover:bg-gray-100'
+                                        }\`}
+                                    >
+                                        <div className="flex justify-between">
+                                            <span>Step {entry.step}</span>
+                                            <span className="text-gray-500">{entry.command}</span>
+                                        </div>
+                                        <div className="text-sm text-gray-600">
+                                            Score: {formatScore(entry.score)}
+                                        </div>
+                                    </div>
+                                ))}
+                            </div>
+                        </div>
+
+                        {/* Details Panel */}
+                        <div className="bg-white p-4 rounded-lg shadow">
+                            <h2 className="text-xl font-bold mb-4">Step Details</h2>
+                            {selectedStep !== null && data[selectedStep] ? (
+                                <div className="space-y-4">
+                                    <div>
+                                        <h3 className="font-medium">Code Changes</h3>
+                                        <pre className="bg-gray-100 p-3 rounded mt-2 overflow-x-auto">
+                                            <code>{data[selectedStep].code_lines.join('\\n')}</code>
+                                        </pre>
+                                    </div>
+                                    <div>
+                                        <h3 className="font-medium">Model Response</h3>
+                                        <p className="text-gray-600 mt-2">
+                                            {data[selectedStep].model_response}
+                                        </p>
+                                    </div>
+                                    <div className="text-sm text-gray-500">
+                                        {new Date(data[selectedStep].timestamp).toLocaleString()}
+                                    </div>
+                                </div>
+                            ) : (
+                                <div className="text-center text-gray-500">
+                                    Select a step to view details
+                                </div>
+                            )}
+                        </div>
+                    </div>
+                </div>
+            );
+        }
+
+        const root = ReactDOM.createRoot(document.getElementById('root'));
+        root.render(<App />);
+    </script>
+</body>
+</html>"""
+
+    output_path = os.path.join("research_dir", "visualization.html")
+    with open(output_path, 'w') as f:
+        f.write(html_content)
+
+    return output_path
+
+def main():
+    """Main function to run the visualization test"""
+    print("Starting visualization test...")
+
+    # Generate and save test logs
+    logs = generate_test_logs()
+    log_file_path = save_logs(logs)
+    print(f"Generated test logs at: {log_file_path}")
+
+    # Create visualization HTML
+    viz_path = create_visualization_html(log_file_path)
+    print(f"Created visualization interface at: {viz_path}")
+
+    # Print some information about the logs
+    print(f"Log file contains {len(logs)} entries")
+    print("Sample log entry:")
+    print(json.dumps(logs[0], indent=2))
+
+    print("\nVisualization test complete!")
+    print("\nTo view the visualization:")
+    print("1. Start a local server: python -m http.server")
+    print("2. Open http://localhost:8000/research_dir/visualization.html in your browser")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/visualization.py b/visualization.py
new file mode 100644
index 0000000..676b5a4
--- /dev/null
+++ b/visualization.py
@@ -0,0 +1,56 @@
+import json
+import os
+from datetime import datetime
+
+class AgentLogger:
+    def __init__(self, agent_name, log_dir="agent_logs"):
+        self.agent_name = agent_name
+        self.log_dir = log_dir
+        self.current_session = datetime.now().strftime("%Y%m%d_%H%M%S")
+        self.logs = []
+
+        # Create log directory if it doesn't exist
+        if not os.path.exists(log_dir):
+            os.makedirs(log_dir)
+
+    def log_step(self, step_data):
+        """Log a single step of agent behavior"""
+        log_entry = {
+            'timestamp': datetime.now().isoformat(),
+            'step': len(self.logs),
+            **step_data
+        }
+        self.logs.append(log_entry)
+
+    def save_logs(self):
+        """Save current logs to file"""
+        filename = f"{self.log_dir}/{self.agent_name}_{self.current_session}.json"
+        with open(filename, 'w') as f:
+            json.dump(self.logs, f, indent=2)
+        return filename
+
+    def get_logs(self):
+        """Return current logs"""
+        return self.logs
+
+    def clear_logs(self):
+        """Clear current logs"""
+        self.logs = []
+
+class VisualizationManager:
+    def __init__(self):
+        self.loggers = {}
+
+    def get_logger(self, agent_name):
+        """Get or create a logger for an agent"""
+        if agent_name not in self.loggers:
+            self.loggers[agent_name] = AgentLogger(agent_name)
+        return self.loggers[agent_name]
+
+    def get_all_logs(self):
+        """Get logs from all agents"""
+        return {name: logger.get_logs() for name, logger in self.loggers.items()}
+
+    def save_all_logs(self):
+        """Save logs from all agents"""
+        return {name: logger.save_logs() for name, logger in self.loggers.items()}
\ No newline at end of file