From 680becf76a72ba2ee9690be999b631ffcc414761 Mon Sep 17 00:00:00 2001
From: rchan <rchan@turing.ac.uk>
Date: Mon, 23 Sep 2024 17:21:40 +0100
Subject: [PATCH 1/7] allow input csv and output csv in experiment run

---
 examples/notebooks/running_experiments.ipynb |  14 +-
 pyproject.toml                               |   5 +-
 src/prompto/experiment.py                    | 133 +++++++++++++++----
 src/prompto/experiment_pipeline.py           |   4 +-
 src/prompto/scripts/run_experiment.py        |  18 ++-
 src/prompto/utils.py                         |  12 +-
 tests/core/test_experiment.py                |   6 +-
 tests/core/test_experiment_process.py        |  20 +--
 tests/core/test_experiment_query_model.py    |  28 ++--
 tests/core/test_experiment_send_requests.py  |  16 +--
 tests/core/test_utils.py                     |  14 +-
 tests/scripts/test_run_experiment.py         |   2 +-
 12 files changed, 188 insertions(+), 84 deletions(-)

diff --git a/examples/notebooks/running_experiments.ipynb b/examples/notebooks/running_experiments.ipynb
index 6e6f9846..fac1a827 100644
--- a/examples/notebooks/running_experiments.ipynb
+++ b/examples/notebooks/running_experiments.ipynb
@@ -347,7 +347,7 @@
     "- `creation_time`: the time the experiment file was created\n",
     "- `log_file`: the path to the log file for the experiment, e.g. `data_folder/output_folder/experiment_name/{creation_time}_experiment_name.log`\n",
     "- `input_file_path`: the path to the input JSONL file, e.g. `data_folder/input_folder/experiment_name.jsonl`\n",
-    "- `output_completed_file_path`: the path to the completed output JSONL file, e.g. `data_folder/output_folder/experiment_name/completed-experiment_name.jsonl`\n",
+    "- `output_completed_jsonl_file_path`: the path to the completed output JSONL file, e.g. `data_folder/output_folder/experiment_name/completed-experiment_name.jsonl`\n",
     "- `output_input_file_path`: the path to the input output JSONL file, e.g. `data_folder/output_folder/experiment_name/input-experiment_name.jsonl` (this is just for logging to know what the input to the experiment was)\n",
     "\n",
     "Essentially, when initialising an `Experiment` object, we construct all the paths that are relevant to that particular experiment such as the log file, the input file path, and the file paths for storing the final output for the experiment. \n",
@@ -510,8 +510,8 @@
       "experiment.file_name: test.jsonl\n",
       "experiment.input_file_path: data2/input/test.jsonl\n",
       "experiment.output_folder: data2/output/test\n",
-      "experiment.output_input_file_out_path: data2/output/test/25-06-2024-19-14-47-input-test.jsonl\n",
-      "experiment.output_completed_file_path: data2/output/test/25-06-2024-19-14-47-completed-test.jsonl\n",
+      "experiment.output_input_jsonl_file_out_path: data2/output/test/25-06-2024-19-14-47-input-test.jsonl\n",
+      "experiment.output_completed_jsonl_file_path: data2/output/test/25-06-2024-19-14-47-completed-test.jsonl\n",
       "experiment.log_file: data2/output/test/25-06-2024-19-14-47-log-test.txt\n"
      ]
     }
@@ -520,8 +520,12 @@
     "print(f\"experiment.file_name: {experiment.file_name}\")\n",
     "print(f\"experiment.input_file_path: {experiment.input_file_path}\")\n",
     "print(f\"experiment.output_folder: {experiment.output_folder}\")\n",
-    "print(f\"experiment.output_input_file_out_path: {experiment.output_input_file_out_path}\")\n",
-    "print(f\"experiment.output_completed_file_path: {experiment.output_completed_file_path}\")\n",
+    "print(\n",
+    "    f\"experiment.output_input_jsonl_file_out_path: {experiment.output_input_jsonl_file_out_path}\"\n",
+    ")\n",
+    "print(\n",
+    "    f\"experiment.output_completed_jsonl_file_path: {experiment.output_completed_jsonl_file_path}\"\n",
+    ")\n",
     "print(f\"experiment.log_file: {experiment.log_file}\")"
    ]
   },
diff --git a/pyproject.toml b/pyproject.toml
index 0a4362c5..84a34151 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,6 +15,7 @@ readme = "README.md"
 python = ">=3.11,<4.0"
 tqdm = "^4.66.4"
 python-dotenv = "^1.0.1"
+pandas = "^2.2.3"
 black = { version = "^24.3.0", optional = true }
 isort = { version = "^5.13.2", optional = true }
 pre-commit = { version = "^3.7.0", optional = true }
@@ -38,8 +39,8 @@ pillow = { version = "^10.4.0", optional = true }
 ollama = { version = "^0.3.1", optional = true }
 huggingface-hub = { version = "^0.23.4", optional = true }
 quart = { version = "^0.19.6", optional = true }
-transformers = { version = "^4.41.2", optional = true }
-torch = { version = "^2.3.1", optional = true }
+transformers = { version = "^4.44.2", optional = true }
+torch = { version = "^2.4.1", optional = true }
 accelerate = { version = "^0.31.0", optional = true }
 aiohttp = { version = "^3.9.5", optional = true }
 anthropic = { version = "^0.31.1", optional = true }
diff --git a/src/prompto/experiment.py b/src/prompto/experiment.py
index 1b91d146..6811b35f 100644
--- a/src/prompto/experiment.py
+++ b/src/prompto/experiment.py
@@ -5,6 +5,7 @@
 import time
 from datetime import datetime
 
+import pandas as pd
 from tqdm import tqdm
 from tqdm.asyncio import tqdm_asyncio
 
@@ -32,7 +33,7 @@ class Experiment:
     Parameters
     ----------
     file_name : str
-        The name of the jsonl experiment file
+        The name of the jsonl or csv experiment file
     settings : Settings
         Settings for the pipeline which includes the data folder locations,
         the maximum number of queries to send per minute, the maximum number
@@ -44,8 +45,8 @@ def __init__(
         file_name: str,
         settings: Settings,
     ):
-        if not file_name.endswith(".jsonl"):
-            raise ValueError("Experiment file must be a jsonl file")
+        if not file_name.endswith(".jsonl") and not file_name.endswith(".csv"):
+            raise ValueError("Experiment file must be a jsonl or csv file")
 
         self.file_name: str = file_name
         # obtain experiment name from file name
@@ -70,15 +71,7 @@ def __init__(
             )
 
         # read in the experiment data
-        with open(self.input_file_path, "r") as f:
-            self._experiment_prompts: list[dict] = [
-                dict(json.loads(line)) for line in f
-            ]
-            # sort the prompts by model_name key for the ollama api
-            # (for avoiding constantly switching and loading models between prompts)
-            self._experiment_prompts = sort_prompts_by_model_for_api(
-                self._experiment_prompts, api="ollama"
-            )
+        self._experiment_prompts = self._read_input_file()
 
         # set the number of queries
         self.number_queries: int = len(self._experiment_prompts)
@@ -95,13 +88,14 @@ def __init__(
         self.log_file: str = os.path.join(
             self.output_folder, f"{self.start_time}-log-{self.experiment_name}.txt"
         )
-        # file path of the completed experiment file in the output experiment folder
-        self.output_completed_file_path: str = os.path.join(
-            self.output_folder, f"{self.start_time}-completed-" + self.file_name
+        # file path of the completed experiment jsonl file in the output experiment folder
+        self.output_completed_jsonl_file_path: str = os.path.join(
+            self.output_folder,
+            f"{self.start_time}-completed-{self.experiment_name}.jsonl",
         )
-        # file path of the input file in the output experiment folder (for logging purposes)
-        self.output_input_file_out_path: str = os.path.join(
-            self.output_folder, f"{self.start_time}-input-" + self.file_name
+        # file path of the input jsonl file in the output experiment folder (for logging purposes)
+        self.output_input_jsonl_file_out_path: str = os.path.join(
+            self.output_folder, f"{self.start_time}-input-{self.experiment_name}.jsonl"
         )
 
         # grouped experiment prompts by
@@ -111,9 +105,31 @@ def __init__(
         # initialise the completed responses
         self.completed_responses: list[dict] = []
 
+        # initialise the completed response data frame
+        self._completed_responses_dataframe: pd.DataFrame | None = None
+
     def __str__(self) -> str:
         return self.file_name
 
+    def _read_input_file(self) -> list[dict]:
+        with open(self.input_file_path, "r") as f:
+            if self.input_file_path.endswith(".jsonl"):
+                experiment_prompts: list[dict] = [dict(json.loads(line)) for line in f]
+            elif self.input_file_path.endswith(".csv"):
+                experiment_prompts: list[dict] = pd.read_csv(f).to_dict(
+                    orient="records"
+                )
+            else:
+                raise ValueError("Experiment file must be a jsonl or csv file")
+
+        # sort the prompts by model_name key for the ollama api
+        # (for avoiding constantly switching and loading models between prompts)
+        experiment_prompts = sort_prompts_by_model_for_api(
+            experiment_prompts, api="ollama"
+        )
+
+        return experiment_prompts
+
     @property
     def experiment_prompts(self) -> list[dict]:
         return self._experiment_prompts
@@ -122,6 +138,19 @@ def experiment_prompts(self) -> list[dict]:
     def experiment_prompts(self, value: list[dict]) -> None:
         raise AttributeError("Cannot set the experiment_prompts attribute")
 
+    @property
+    def completed_responses_dataframe(self) -> pd.DataFrame:
+        if self._completed_responses_dataframe is None:
+            self._completed_responses_dataframe = (
+                self._obtain_completed_responses_dataframe()
+            )
+
+        return self._completed_responses_dataframe
+
+    @completed_responses_dataframe.setter
+    def completed_responses_dataframe(self, value: pd.DataFrame) -> None:
+        raise AttributeError("Cannot set the completed_responses_dataframe attribute")
+
     @property
     def grouped_experiment_prompts(self) -> dict[str, list[dict]]:
         # if settings.parallel is False, then we won't utilise the grouping
@@ -298,14 +327,41 @@ async def process(
         # create the output folder for the experiment
         create_folder(self.output_folder)
 
-        # move the experiment file to the output folder
+        # if the experiment file is csv file, we create a jsonl file which will get moved
+        if self.input_file_path.endswith(".csv"):
+            # move the input experiment csv file to the output folder
+            output_input_csv_file_out_path = (
+                self.output_input_jsonl_file_out_path.replace(".jsonl", ".csv")
+            )
+            logging.info(
+                f"Moving {self.input_file_path} to {self.output_folder} as "
+                f"{output_input_csv_file_out_path}..."
+            )
+            move_file(
+                source=self.input_file_path,
+                destination=output_input_csv_file_out_path,
+            )
+
+            # create an input experiment jsonl file for the experiment
+            logging.info(
+                f"Converting {self.input_file_path} to jsonl file for processing..."
+            )
+            input_file_path_as_jsonl = self.input_file_path.replace(".csv", ".jsonl")
+            with open(input_file_path_as_jsonl, "w") as f:
+                for prompt_dict in self.experiment_prompts:
+                    json.dump(prompt_dict, f)
+                    f.write("\n")
+        else:
+            input_file_path_as_jsonl = self.input_file_path
+
+        # move the input experiment jsonl file to the output folder
         logging.info(
-            f"Moving {self.input_file_path} to {self.output_folder} as "
-            f"{self.output_input_file_out_path}..."
+            f"Moving {input_file_path_as_jsonl} to {self.output_folder} as "
+            f"{self.output_input_jsonl_file_out_path}..."
         )
         move_file(
-            source=self.input_file_path,
-            destination=self.output_input_file_out_path,
+            source=input_file_path_as_jsonl,
+            destination=self.output_input_jsonl_file_out_path,
         )
 
         # run the experiment asynchronously
@@ -347,7 +403,7 @@ async def process(
         avg_query_processing_time = processing_time / self.number_queries
 
         # read the output file
-        with open(self.output_completed_file_path, "r") as f:
+        with open(self.output_completed_jsonl_file_path, "r") as f:
             self.completed_responses: list[dict] = [
                 dict(json.loads(line)) for line in f
             ]
@@ -633,7 +689,7 @@ async def query_model_and_record_response(
 
         # record the response in a jsonl file asynchronously using FILE_WRITE_LOCK
         async with FILE_WRITE_LOCK:
-            with open(self.output_completed_file_path, "a") as f:
+            with open(self.output_completed_jsonl_file_path, "a") as f:
                 json.dump(completed_prompt_dict, f)
                 f.write("\n")
 
@@ -720,3 +776,30 @@ async def evaluate_responses(
             prompt_dict = func(prompt_dict)
 
         return prompt_dict
+
+    def _obtain_completed_responses_dataframe(self) -> pd.DataFrame:
+        if self.completed_responses == []:
+            raise ValueError(
+                "No completed responses to convert to a DataFrame "
+                "(completed_responses attribute is empty). "
+                "Run the process method to obtain the completed responses"
+            )
+
+        return pd.DataFrame.from_records(self.completed_responses)
+
+    def save_completed_responses_to_csv(self, filename: str = None) -> None:
+        """
+        Save the completed responses to a csv file.
+
+        Parameters
+        ----------
+        filename : str | None
+            The name of the csv file to save the completed responses to.
+            If None, the filename will be the experiment name with the
+            timestamp of when the experiment started to run, by default None
+        """
+        if filename is None:
+            filename = self.output_completed_jsonl_file_path.replace(".jsonl", ".csv")
+
+        logging.info(f"Saving completed responses (as csv) to {filename}...")
+        self.completed_responses_dataframe.to_csv(filename, index=False)
diff --git a/src/prompto/experiment_pipeline.py b/src/prompto/experiment_pipeline.py
index 0b09a534..1d079181 100644
--- a/src/prompto/experiment_pipeline.py
+++ b/src/prompto/experiment_pipeline.py
@@ -6,7 +6,7 @@
 from prompto.settings import Settings
 from prompto.utils import (
     create_folder,
-    sort_jsonl_files_by_creation_time,
+    sort_input_files_by_creation_time,
     write_log_message,
 )
 
@@ -77,7 +77,7 @@ def update_experiment_files(self) -> None:
         Function to update the list of experiment files by sorting
         the files by creation/change time (using `os.path.getctime`).
         """
-        self.experiment_files = sort_jsonl_files_by_creation_time(
+        self.experiment_files = sort_input_files_by_creation_time(
             input_folder=self.settings.input_folder
         )
 
diff --git a/src/prompto/scripts/run_experiment.py b/src/prompto/scripts/run_experiment.py
index 9d3ec4d0..84c2cb91 100644
--- a/src/prompto/scripts/run_experiment.py
+++ b/src/prompto/scripts/run_experiment.py
@@ -174,9 +174,9 @@ def parse_file_path_and_check_in_input(
     if not os.path.exists(file_path):
         raise FileNotFoundError(f"File {file_path} not found")
 
-    # check if file is a jsonl file
-    if not file_path.endswith(".jsonl"):
-        raise ValueError("Experiment file must be a jsonl file")
+    # check if file is a jsonl or csv file
+    if not file_path.endswith(".jsonl") and not file_path.endswith(".csv"):
+        raise ValueError("Experiment file must be a jsonl or csv file")
 
     # get experiment file name (without the path)
     experiment_file_name = os.path.basename(file_path)
@@ -397,6 +397,12 @@ async def main():
         type=str,
         default=None,
     )
+    parser.add_argument(
+        "--output-as-csv",
+        help="Output the results as a csv file",
+        action="store_true",
+        default=False,
+    )
     args = parser.parse_args()
 
     # initialise logging
@@ -450,6 +456,9 @@ async def main():
     logging.info(f"Starting processing experiment: {args.file}...")
     await experiment.process(evaluation_funcs=scoring_functions)
 
+    if args.output_as_csv:
+        experiment.save_completed_responses_to_csv()
+
     # create judge experiment
     judge_experiment = create_judge_experiment(
         create_judge_file=create_judge_file,
@@ -466,6 +475,9 @@ async def main():
         )
         await judge_experiment.process()
 
+        if args.output_as_csv:
+            judge_experiment.save_completed_responses_to_csv()
+
     logging.info("Experiment processed successfully!")
 
 
diff --git a/src/prompto/utils.py b/src/prompto/utils.py
index 74553fd9..6b7e50e6 100644
--- a/src/prompto/utils.py
+++ b/src/prompto/utils.py
@@ -7,9 +7,9 @@
 FILE_WRITE_LOCK = asyncio.Lock()
 
 
-def sort_jsonl_files_by_creation_time(input_folder: str) -> list[str]:
+def sort_input_files_by_creation_time(input_folder: str) -> list[str]:
     """
-    Function sorts the jsonl files in the input folder by creation/change
+    Function sorts the jsonl or csv files in the input folder by creation/change
     time in a given directory.
 
     Parameters
@@ -20,7 +20,7 @@ def sort_jsonl_files_by_creation_time(input_folder: str) -> list[str]:
     Returns
     -------
     list[str]
-        Ordered list of jsonl filenames in the input folder.
+        Ordered list of jsonl or csv filenames in the input folder.
     """
     if not os.path.isdir(input_folder):
         raise ValueError(
@@ -28,7 +28,11 @@ def sort_jsonl_files_by_creation_time(input_folder: str) -> list[str]:
         )
 
     return sorted(
-        [f for f in os.listdir(input_folder) if f.endswith(".jsonl")],
+        [
+            f
+            for f in os.listdir(input_folder)
+            if (f.endswith(".jsonl") or f.endswith(".csv"))
+        ],
         key=lambda f: os.path.getctime(os.path.join(input_folder, f)),
     )
 
diff --git a/tests/core/test_experiment.py b/tests/core/test_experiment.py
index 66ad8e54..4604d3a6 100644
--- a/tests/core/test_experiment.py
+++ b/tests/core/test_experiment.py
@@ -20,7 +20,7 @@ def test_experiment_init_errors(temporary_data_folders):
         Experiment(settings=Settings())
 
     # passing in a filename that is not a .jsonl file should raise a ValueError
-    with pytest.raises(ValueError, match="Experiment file must be a jsonl file"):
+    with pytest.raises(ValueError, match="Experiment file must be a jsonl or csv file"):
         Experiment("test.txt", settings=Settings())
 
     # passing in a filename that is not in settings.input_folder should raise a FileNotFoundError
@@ -56,11 +56,11 @@ def test_experiment_init(temporary_data_folders):
     assert isinstance(experiment.creation_time, str)
     assert isinstance(experiment.start_time, str)
     assert (
-        experiment.output_completed_file_path
+        experiment.output_completed_jsonl_file_path
         == f"data/output/test_in_input/{experiment.start_time}-completed-test_in_input.jsonl"
     )
     assert (
-        experiment.output_input_file_out_path
+        experiment.output_input_jsonl_file_out_path
         == f"data/output/test_in_input/{experiment.start_time}-input-test_in_input.jsonl"
     )
     assert experiment._experiment_prompts == [
diff --git a/tests/core/test_experiment_process.py b/tests/core/test_experiment_process.py
index 0641b6de..90708bdb 100644
--- a/tests/core/test_experiment_process.py
+++ b/tests/core/test_experiment_process.py
@@ -65,8 +65,8 @@ async def test_process(
     assert experiment.completed_responses == result
 
     # check that the response is saved to the output file
-    assert os.path.exists(experiment.output_completed_file_path)
-    with open(experiment.output_completed_file_path, "r") as f:
+    assert os.path.exists(experiment.output_completed_jsonl_file_path)
+    with open(experiment.output_completed_jsonl_file_path, "r") as f:
         responses = [dict(json.loads(line)) for line in f]
 
     assert responses == result
@@ -194,8 +194,8 @@ async def test_process_with_max_queries_dict(
     assert experiment.completed_responses == result
 
     # check that the response is saved to the output file
-    assert os.path.exists(experiment.output_completed_file_path)
-    with open(experiment.output_completed_file_path, "r") as f:
+    assert os.path.exists(experiment.output_completed_jsonl_file_path)
+    with open(experiment.output_completed_jsonl_file_path, "r") as f:
         responses = [dict(json.loads(line)) for line in f]
 
     assert responses == result
@@ -337,8 +337,8 @@ async def test_process_with_groups(
     assert experiment.completed_responses == result
 
     # check that the response is saved to the output file
-    assert os.path.exists(experiment.output_completed_file_path)
-    with open(experiment.output_completed_file_path, "r") as f:
+    assert os.path.exists(experiment.output_completed_jsonl_file_path)
+    with open(experiment.output_completed_jsonl_file_path, "r") as f:
         responses = [dict(json.loads(line)) for line in f]
 
     assert responses == result
@@ -466,8 +466,8 @@ async def test_process_with_max_queries_dict_and_groups(
     assert experiment.completed_responses == result
 
     # check that the response is saved to the output file
-    assert os.path.exists(experiment.output_completed_file_path)
-    with open(experiment.output_completed_file_path, "r") as f:
+    assert os.path.exists(experiment.output_completed_jsonl_file_path)
+    with open(experiment.output_completed_jsonl_file_path, "r") as f:
         responses = [dict(json.loads(line)) for line in f]
 
     assert responses == result
@@ -730,8 +730,8 @@ async def test_process_with_evaluation(
     assert experiment.completed_responses == result
 
     # check that the response is saved to the output file
-    assert os.path.exists(experiment.output_completed_file_path)
-    with open(experiment.output_completed_file_path, "r") as f:
+    assert os.path.exists(experiment.output_completed_jsonl_file_path)
+    with open(experiment.output_completed_jsonl_file_path, "r") as f:
         responses = [dict(json.loads(line)) for line in f]
 
     assert responses == result
diff --git a/tests/core/test_experiment_query_model.py b/tests/core/test_experiment_query_model.py
index 328deab9..f39c2f68 100644
--- a/tests/core/test_experiment_query_model.py
+++ b/tests/core/test_experiment_query_model.py
@@ -53,8 +53,8 @@ async def test_query_model_and_record_response(
     assert log_msg in caplog.text
 
     # check that the response is saved to the output file
-    assert os.path.exists(experiment.output_completed_file_path)
-    with open(experiment.output_completed_file_path, "r") as f:
+    assert os.path.exists(experiment.output_completed_jsonl_file_path)
+    with open(experiment.output_completed_jsonl_file_path, "r") as f:
         responses = [dict(json.loads(line)) for line in f]
 
     assert len(responses) == 1
@@ -158,8 +158,8 @@ async def test_query_model_and_record_response_not_implemented_error(
     )
 
     # check that the response is saved to the output file
-    assert os.path.exists(experiment.output_completed_file_path)
-    with open(experiment.output_completed_file_path, "r") as f:
+    assert os.path.exists(experiment.output_completed_jsonl_file_path)
+    with open(experiment.output_completed_jsonl_file_path, "r") as f:
         responses = [dict(json.loads(line)) for line in f]
 
     assert len(responses) == 1
@@ -213,8 +213,8 @@ async def test_query_model_and_record_response_key_error(
     assert result["response"] == "KeyError - 'some key error'"
 
     # check that the response is saved to the output file
-    assert os.path.exists(experiment.output_completed_file_path)
-    with open(experiment.output_completed_file_path, "r") as f:
+    assert os.path.exists(experiment.output_completed_jsonl_file_path)
+    with open(experiment.output_completed_jsonl_file_path, "r") as f:
         responses = [dict(json.loads(line)) for line in f]
 
     assert len(responses) == 1
@@ -264,8 +264,8 @@ async def test_query_model_and_record_response_value_error(
     assert result["response"] == "ValueError - some value error"
 
     # check that the response is saved to the output file
-    assert os.path.exists(experiment.output_completed_file_path)
-    with open(experiment.output_completed_file_path, "r") as f:
+    assert os.path.exists(experiment.output_completed_jsonl_file_path)
+    with open(experiment.output_completed_jsonl_file_path, "r") as f:
         responses = [dict(json.loads(line)) for line in f]
 
     assert len(responses) == 1
@@ -315,8 +315,8 @@ async def test_query_model_and_record_response_type_error(
     assert result["response"] == "TypeError - some type error"
 
     # check that the response is saved to the output file
-    assert os.path.exists(experiment.output_completed_file_path)
-    with open(experiment.output_completed_file_path, "r") as f:
+    assert os.path.exists(experiment.output_completed_jsonl_file_path)
+    with open(experiment.output_completed_jsonl_file_path, "r") as f:
         responses = [dict(json.loads(line)) for line in f]
 
     assert len(responses) == 1
@@ -366,8 +366,8 @@ async def test_query_model_and_record_response_file_not_found_error(
     assert result["response"] == "FileNotFoundError - some type error"
 
     # check that the response is saved to the output file
-    assert os.path.exists(experiment.output_completed_file_path)
-    with open(experiment.output_completed_file_path, "r") as f:
+    assert os.path.exists(experiment.output_completed_jsonl_file_path)
+    with open(experiment.output_completed_jsonl_file_path, "r") as f:
         responses = [dict(json.loads(line)) for line in f]
 
     assert len(responses) == 1
@@ -460,8 +460,8 @@ async def test_query_model_and_record_response_exception_error_max(
     )
 
     # check that the response is saved to the output file
-    assert os.path.exists(experiment.output_completed_file_path)
-    with open(experiment.output_completed_file_path, "r") as f:
+    assert os.path.exists(experiment.output_completed_jsonl_file_path)
+    with open(experiment.output_completed_jsonl_file_path, "r") as f:
         responses = [dict(json.loads(line)) for line in f]
 
     assert len(responses) == 1
diff --git a/tests/core/test_experiment_send_requests.py b/tests/core/test_experiment_send_requests.py
index 0693b0f5..6bd70df5 100644
--- a/tests/core/test_experiment_send_requests.py
+++ b/tests/core/test_experiment_send_requests.py
@@ -345,8 +345,8 @@ async def test_send_requests_retry(
     )
 
     # check that the response is saved to the output file
-    assert os.path.exists(experiment.output_completed_file_path)
-    with open(experiment.output_completed_file_path, "r") as f:
+    assert os.path.exists(experiment.output_completed_jsonl_file_path)
+    with open(experiment.output_completed_jsonl_file_path, "r") as f:
         responses = [dict(json.loads(line)) for line in f]
 
     assert len(responses) == len(PROMPT_DICTS_TO_TEST)
@@ -414,8 +414,8 @@ async def test_send_requests_retry_no_retries(
     )
 
     # check that the response is saved to the output file
-    assert os.path.exists(experiment.output_completed_file_path)
-    with open(experiment.output_completed_file_path, "r") as f:
+    assert os.path.exists(experiment.output_completed_jsonl_file_path)
+    with open(experiment.output_completed_jsonl_file_path, "r") as f:
         responses = [dict(json.loads(line)) for line in f]
 
     assert len(responses) == len(PROMPT_DICTS_TO_TEST) - 1
@@ -466,8 +466,8 @@ async def test_send_requests_retry_with_group(
     )
 
     # check that the response is saved to the output file
-    assert os.path.exists(experiment.output_completed_file_path)
-    with open(experiment.output_completed_file_path, "r") as f:
+    assert os.path.exists(experiment.output_completed_jsonl_file_path)
+    with open(experiment.output_completed_jsonl_file_path, "r") as f:
         responses = [dict(json.loads(line)) for line in f]
 
     assert len(responses) == len(PROMPT_DICTS_TO_TEST)
@@ -547,8 +547,8 @@ async def test_send_requests_retry_no_retries_group(
     )
 
     # check that the response is saved to the output file
-    assert os.path.exists(experiment.output_completed_file_path)
-    with open(experiment.output_completed_file_path, "r") as f:
+    assert os.path.exists(experiment.output_completed_jsonl_file_path)
+    with open(experiment.output_completed_jsonl_file_path, "r") as f:
         responses = [dict(json.loads(line)) for line in f]
 
     assert len(responses) == len(PROMPT_DICTS_TO_TEST) - 1
diff --git a/tests/core/test_utils.py b/tests/core/test_utils.py
index 8f9f4ec8..7c554c52 100644
--- a/tests/core/test_utils.py
+++ b/tests/core/test_utils.py
@@ -19,29 +19,29 @@
     log_success_response_query,
     move_file,
     parse_list_arg,
-    sort_jsonl_files_by_creation_time,
+    sort_input_files_by_creation_time,
     sort_prompts_by_model_for_api,
     write_log_message,
 )
 
 
-def test_sort_jsonl_files_by_creation_time(temporary_data_folders, caplog):
+def test_sort_input_files_by_creation_time(temporary_data_folders, caplog):
     caplog.set_level(logging.INFO)
     # raise error if no input folder is passed
     with pytest.raises(TypeError, match="missing 1 required positional argument"):
-        sort_jsonl_files_by_creation_time()
+        sort_input_files_by_creation_time()
 
     # raise error if not a path
     with pytest.raises(
         ValueError, match="Input folder 'not_a_folder' must be a valid path to a folder"
     ):
-        sort_jsonl_files_by_creation_time(input_folder="not_a_folder")
+        sort_input_files_by_creation_time(input_folder="not_a_folder")
 
     # raise error if not a folder
     with pytest.raises(
         ValueError, match="Input folder 'test.txt' must be a valid path to a folder"
     ):
-        sort_jsonl_files_by_creation_time(input_folder="test.txt")
+        sort_input_files_by_creation_time(input_folder="test.txt")
 
     # sort the jsonl files in the utils folder by creation time
     logging.info(
@@ -54,11 +54,11 @@ def test_sort_jsonl_files_by_creation_time(temporary_data_folders, caplog):
             if f.endswith(".jsonl")
         }
     )
-    sorted_files = sort_jsonl_files_by_creation_time(input_folder="utils")
+    sorted_files = sort_input_files_by_creation_time(input_folder="utils")
     assert sorted_files == ["first.jsonl", "second.jsonl", "third.jsonl"]
 
     # sort empty folder should return empty list
-    empty_folder = sort_jsonl_files_by_creation_time(input_folder="data")
+    empty_folder = sort_input_files_by_creation_time(input_folder="data")
     assert empty_folder == []
 
 
diff --git a/tests/scripts/test_run_experiment.py b/tests/scripts/test_run_experiment.py
index a93ff213..267348a5 100644
--- a/tests/scripts/test_run_experiment.py
+++ b/tests/scripts/test_run_experiment.py
@@ -151,7 +151,7 @@ def test_parse_file_path_and_check_in_input_error(temporary_data_folder_judge):
         parse_file_path_and_check_in_input("unknown.json", "test")
 
     # raise error if file is not jsonl file path
-    with pytest.raises(ValueError, match="Experiment file must be a jsonl file"):
+    with pytest.raises(ValueError, match="Experiment file must be a jsonl or csv file"):
         parse_file_path_and_check_in_input("max_queries_dict.json", "test")
 
 

From 35019cd8b7210a8273044d164744c169a18190ce Mon Sep 17 00:00:00 2001
From: rchan <rchan@turing.ac.uk>
Date: Tue, 24 Sep 2024 09:15:48 +0100
Subject: [PATCH 2/7] update running experiments notebook

---
 ... 24-09-2024-09-13-56-completed-test.jsonl} |   0
 .../24-09-2024-09-13-56-input-test.jsonl}     |   0
 .../test/24-09-2024-09-13-56-log-test.txt     |   4 +
 .../test/25-06-2024-19-14-47-input-test.jsonl |   3 -
 .../test/25-06-2024-19-14-47-log-test.txt     |   4 -
 ...24-09-2024-09-14-36-completed-test2.jsonl} |   6 +-
 .../24-09-2024-09-14-36-input-test2.jsonl}    |   0
 .../test2/24-09-2024-09-14-36-log-test2.txt   |   2 +
 .../25-06-2024-19-15-29-input-test2.jsonl     |   3 -
 .../test2/25-06-2024-19-15-29-log-test2.txt   |   1 -
 examples/notebooks/running_experiments.ipynb  | 155 +++++++++++++++---
 11 files changed, 138 insertions(+), 40 deletions(-)
 rename examples/notebooks/data2/output/test/{25-06-2024-19-14-47-completed-test.jsonl => 24-09-2024-09-13-56-completed-test.jsonl} (100%)
 mode change 100644 => 100755
 rename examples/notebooks/data2/{input/test.jsonl => output/test/24-09-2024-09-13-56-input-test.jsonl} (100%)
 create mode 100755 examples/notebooks/data2/output/test/24-09-2024-09-13-56-log-test.txt
 delete mode 100644 examples/notebooks/data2/output/test/25-06-2024-19-14-47-input-test.jsonl
 delete mode 100644 examples/notebooks/data2/output/test/25-06-2024-19-14-47-log-test.txt
 rename examples/notebooks/data2/output/test2/{25-06-2024-19-15-29-completed-test2.jsonl => 24-09-2024-09-14-36-completed-test2.jsonl} (57%)
 rename examples/notebooks/data2/{input/test2.jsonl => output/test2/24-09-2024-09-14-36-input-test2.jsonl} (100%)
 create mode 100644 examples/notebooks/data2/output/test2/24-09-2024-09-14-36-log-test2.txt
 delete mode 100644 examples/notebooks/data2/output/test2/25-06-2024-19-15-29-input-test2.jsonl
 delete mode 100644 examples/notebooks/data2/output/test2/25-06-2024-19-15-29-log-test2.txt

diff --git a/examples/notebooks/data2/output/test/25-06-2024-19-14-47-completed-test.jsonl b/examples/notebooks/data2/output/test/24-09-2024-09-13-56-completed-test.jsonl
old mode 100644
new mode 100755
similarity index 100%
rename from examples/notebooks/data2/output/test/25-06-2024-19-14-47-completed-test.jsonl
rename to examples/notebooks/data2/output/test/24-09-2024-09-13-56-completed-test.jsonl
diff --git a/examples/notebooks/data2/input/test.jsonl b/examples/notebooks/data2/output/test/24-09-2024-09-13-56-input-test.jsonl
similarity index 100%
rename from examples/notebooks/data2/input/test.jsonl
rename to examples/notebooks/data2/output/test/24-09-2024-09-13-56-input-test.jsonl
diff --git a/examples/notebooks/data2/output/test/24-09-2024-09-13-56-log-test.txt b/examples/notebooks/data2/output/test/24-09-2024-09-13-56-log-test.txt
new file mode 100755
index 00000000..48657632
--- /dev/null
+++ b/examples/notebooks/data2/output/test/24-09-2024-09-13-56-log-test.txt
@@ -0,0 +1,4 @@
+24-09-2024, 09:14: Error (i=1, id=9): NotImplementedError - API unknown-api not recognised or implemented
+24-09-2024, 09:14: Error (i=2, id=10): NotImplementedError - API unknown-api not recognised or implemented
+24-09-2024, 09:14: Error (i=3, id=11): NotImplementedError - API unknown-api not recognised or implemented
+24-09-2024, 09:14: Completed experiment: test.jsonl! Experiment processing time: 3.703 seconds, Average time per query: 1.234 seconds
diff --git a/examples/notebooks/data2/output/test/25-06-2024-19-14-47-input-test.jsonl b/examples/notebooks/data2/output/test/25-06-2024-19-14-47-input-test.jsonl
deleted file mode 100644
index 6d212df6..00000000
--- a/examples/notebooks/data2/output/test/25-06-2024-19-14-47-input-test.jsonl
+++ /dev/null
@@ -1,3 +0,0 @@
-{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "api": "unknown-api", "model_name": "unknown-model-name", "parameters": {"candidate_count": 1, "max_output_tokens": 64, "temperature": 1, "top_k": 40}}
-{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "api": "unknown-api", "model_name": "unknown-model-name", "parameters": {"candidate_count": 1, "max_output_tokens": 128, "temperature": 0.5, "top_k": 40}}
-{"id": 11, "prompt": "How many theaters are there in London's South End?", "api": "unknown-api", "model_name": "unknown-model-name"}
diff --git a/examples/notebooks/data2/output/test/25-06-2024-19-14-47-log-test.txt b/examples/notebooks/data2/output/test/25-06-2024-19-14-47-log-test.txt
deleted file mode 100644
index 800e796f..00000000
--- a/examples/notebooks/data2/output/test/25-06-2024-19-14-47-log-test.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-25-06-2024, 19:18: Error (i=1) [id=9]. NotImplementedError - API unknown-api not recognised or implemented
-25-06-2024, 19:18: Error (i=2) [id=10]. NotImplementedError - API unknown-api not recognised or implemented
-25-06-2024, 19:18: Error (i=3) [id=11]. NotImplementedError - API unknown-api not recognised or implemented
-25-06-2024, 19:18: Completed experiment test.jsonl! Experiment processing time: 3.713 seconds, Average time per query: 1.238 seconds
diff --git a/examples/notebooks/data2/output/test2/25-06-2024-19-15-29-completed-test2.jsonl b/examples/notebooks/data2/output/test2/24-09-2024-09-14-36-completed-test2.jsonl
similarity index 57%
rename from examples/notebooks/data2/output/test2/25-06-2024-19-15-29-completed-test2.jsonl
rename to examples/notebooks/data2/output/test2/24-09-2024-09-14-36-completed-test2.jsonl
index 780c6060..584035ef 100644
--- a/examples/notebooks/data2/output/test2/25-06-2024-19-15-29-completed-test2.jsonl
+++ b/examples/notebooks/data2/output/test2/24-09-2024-09-14-36-completed-test2.jsonl
@@ -1,3 +1,3 @@
-{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "api": "test", "model_name": "test", "parameters": {"candidate_count": 1, "max_output_tokens": 64, "temperature": 1, "top_k": 40}, "response": "This is a test response"}
-{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "api": "test", "model_name": "test", "parameters": {"candidate_count": 1, "max_output_tokens": 128, "temperature": 0.5, "top_k": 40}, "response": "This is a test response"}
-{"id": 11, "prompt": "How many theaters are there in London's South End?", "api": "test", "model_name": "test", "response": "This is a test response"}
+{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "api": "test", "model_name": "test", "parameters": {"candidate_count": 1, "max_output_tokens": 64, "temperature": 1, "top_k": 40}, "timestamp_sent": "24-09-2024-09-14-39", "response": "This is a test response"}
+{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "api": "test", "model_name": "test", "parameters": {"candidate_count": 1, "max_output_tokens": 128, "temperature": 0.5, "top_k": 40}, "timestamp_sent": "24-09-2024-09-14-40", "response": "This is a test response"}
+{"id": 11, "prompt": "How many theaters are there in London's South End?", "api": "test", "model_name": "test", "timestamp_sent": "24-09-2024-09-14-41", "response": "ValueError - This is a test error which we should handle and return"}
diff --git a/examples/notebooks/data2/input/test2.jsonl b/examples/notebooks/data2/output/test2/24-09-2024-09-14-36-input-test2.jsonl
similarity index 100%
rename from examples/notebooks/data2/input/test2.jsonl
rename to examples/notebooks/data2/output/test2/24-09-2024-09-14-36-input-test2.jsonl
diff --git a/examples/notebooks/data2/output/test2/24-09-2024-09-14-36-log-test2.txt b/examples/notebooks/data2/output/test2/24-09-2024-09-14-36-log-test2.txt
new file mode 100644
index 00000000..bb44e5ce
--- /dev/null
+++ b/examples/notebooks/data2/output/test2/24-09-2024-09-14-36-log-test2.txt
@@ -0,0 +1,2 @@
+24-09-2024, 09:14: Error (i=3, id=11): ValueError - This is a test error which we should handle and return
+24-09-2024, 09:14: Completed experiment: test2.jsonl! Experiment processing time: 3.615 seconds, Average time per query: 1.205 seconds
diff --git a/examples/notebooks/data2/output/test2/25-06-2024-19-15-29-input-test2.jsonl b/examples/notebooks/data2/output/test2/25-06-2024-19-15-29-input-test2.jsonl
deleted file mode 100644
index 1233f43d..00000000
--- a/examples/notebooks/data2/output/test2/25-06-2024-19-15-29-input-test2.jsonl
+++ /dev/null
@@ -1,3 +0,0 @@
-{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "api": "test", "model_name": "test", "parameters": {"candidate_count": 1, "max_output_tokens": 64, "temperature": 1, "top_k": 40}}
-{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "api": "test", "model_name": "test", "parameters": {"candidate_count": 1, "max_output_tokens": 128, "temperature": 0.5, "top_k": 40}}
-{"id": 11, "prompt": "How many theaters are there in London's South End?", "api": "test", "model_name": "test"}
diff --git a/examples/notebooks/data2/output/test2/25-06-2024-19-15-29-log-test2.txt b/examples/notebooks/data2/output/test2/25-06-2024-19-15-29-log-test2.txt
deleted file mode 100644
index 8ed63d80..00000000
--- a/examples/notebooks/data2/output/test2/25-06-2024-19-15-29-log-test2.txt
+++ /dev/null
@@ -1 +0,0 @@
-25-06-2024, 19:19: Completed experiment test2.jsonl! Experiment processing time: 4.613 seconds, Average time per query: 1.538 seconds
diff --git a/examples/notebooks/running_experiments.ipynb b/examples/notebooks/running_experiments.ipynb
index fac1a827..aa1715fd 100644
--- a/examples/notebooks/running_experiments.ipynb
+++ b/examples/notebooks/running_experiments.ipynb
@@ -142,7 +142,7 @@
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mWriteFolderError\u001b[0m                          Traceback (most recent call last)",
       "Cell \u001b[0;32mIn[6], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43msettings\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minput_folder\u001b[49m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124munknown_folder/input\u001b[39m\u001b[38;5;124m\"\u001b[39m\n",
-      "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/prompto/src/prompto/settings.py:216\u001b[0m, in \u001b[0;36mSettings.input_folder\u001b[0;34m(self, value)\u001b[0m\n\u001b[1;32m    214\u001b[0m \u001b[38;5;129m@input_folder\u001b[39m\u001b[38;5;241m.\u001b[39msetter\n\u001b[1;32m    215\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minput_folder\u001b[39m(\u001b[38;5;28mself\u001b[39m, value: \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m--> 216\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m WriteFolderError(\n\u001b[1;32m    217\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot set input folder on it\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms own. Set the \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata_folder\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m instead\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    218\u001b[0m     )\n",
+      "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/prompto/src/prompto/settings.py:176\u001b[0m, in \u001b[0;36mSettings.input_folder\u001b[0;34m(self, value)\u001b[0m\n\u001b[1;32m    174\u001b[0m \u001b[38;5;129m@input_folder\u001b[39m\u001b[38;5;241m.\u001b[39msetter\n\u001b[1;32m    175\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minput_folder\u001b[39m(\u001b[38;5;28mself\u001b[39m, value: \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m--> 176\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m WriteFolderError(\n\u001b[1;32m    177\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot set input folder on it\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms own. Set the \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata_folder\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m instead\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    178\u001b[0m     )\n",
       "\u001b[0;31mWriteFolderError\u001b[0m: Cannot set input folder on it's own. Set the 'data_folder' instead"
      ]
     }
@@ -164,7 +164,7 @@
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mWriteFolderError\u001b[0m                          Traceback (most recent call last)",
       "Cell \u001b[0;32mIn[7], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43msettings\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moutput_folder\u001b[49m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124munknown_folder/output\u001b[39m\u001b[38;5;124m\"\u001b[39m\n",
-      "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/prompto/src/prompto/settings.py:228\u001b[0m, in \u001b[0;36mSettings.output_folder\u001b[0;34m(self, value)\u001b[0m\n\u001b[1;32m    226\u001b[0m \u001b[38;5;129m@output_folder\u001b[39m\u001b[38;5;241m.\u001b[39msetter\n\u001b[1;32m    227\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21moutput_folder\u001b[39m(\u001b[38;5;28mself\u001b[39m, value: \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m--> 228\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m WriteFolderError(\n\u001b[1;32m    229\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot set output folder on it\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms own. Set the \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata_folder\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m instead\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    230\u001b[0m     )\n",
+      "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/prompto/src/prompto/settings.py:188\u001b[0m, in \u001b[0;36mSettings.output_folder\u001b[0;34m(self, value)\u001b[0m\n\u001b[1;32m    186\u001b[0m \u001b[38;5;129m@output_folder\u001b[39m\u001b[38;5;241m.\u001b[39msetter\n\u001b[1;32m    187\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21moutput_folder\u001b[39m(\u001b[38;5;28mself\u001b[39m, value: \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m--> 188\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m WriteFolderError(\n\u001b[1;32m    189\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot set output folder on it\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms own. Set the \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata_folder\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m instead\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    190\u001b[0m     )\n",
       "\u001b[0;31mWriteFolderError\u001b[0m: Cannot set output folder on it's own. Set the 'data_folder' instead"
      ]
     }
@@ -186,7 +186,7 @@
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mWriteFolderError\u001b[0m                          Traceback (most recent call last)",
       "Cell \u001b[0;32mIn[8], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43msettings\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmedia_folder\u001b[49m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124munknown_folder/media\u001b[39m\u001b[38;5;124m\"\u001b[39m\n",
-      "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/prompto/src/prompto/settings.py:240\u001b[0m, in \u001b[0;36mSettings.media_folder\u001b[0;34m(self, value)\u001b[0m\n\u001b[1;32m    238\u001b[0m \u001b[38;5;129m@media_folder\u001b[39m\u001b[38;5;241m.\u001b[39msetter\n\u001b[1;32m    239\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmedia_folder\u001b[39m(\u001b[38;5;28mself\u001b[39m, value: \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m--> 240\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m WriteFolderError(\n\u001b[1;32m    241\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot set media folder on it\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms own. Set the \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata_folder\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m instead\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    242\u001b[0m     )\n",
+      "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/prompto/src/prompto/settings.py:200\u001b[0m, in \u001b[0;36mSettings.media_folder\u001b[0;34m(self, value)\u001b[0m\n\u001b[1;32m    198\u001b[0m \u001b[38;5;129m@media_folder\u001b[39m\u001b[38;5;241m.\u001b[39msetter\n\u001b[1;32m    199\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmedia_folder\u001b[39m(\u001b[38;5;28mself\u001b[39m, value: \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m--> 200\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m WriteFolderError(\n\u001b[1;32m    201\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot set media folder on it\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms own. Set the \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata_folder\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m instead\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    202\u001b[0m     )\n",
       "\u001b[0;31mWriteFolderError\u001b[0m: Cannot set media folder on it's own. Set the 'data_folder' instead"
      ]
     }
@@ -251,8 +251,8 @@
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
       "Cell \u001b[0;32mIn[11], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43msettings\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata_folder\u001b[49m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124munknown_folder\u001b[39m\u001b[38;5;124m\"\u001b[39m\n",
-      "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/prompto/src/prompto/settings.py:202\u001b[0m, in \u001b[0;36mSettings.data_folder\u001b[0;34m(self, value)\u001b[0m\n\u001b[1;32m    199\u001b[0m \u001b[38;5;129m@data_folder\u001b[39m\u001b[38;5;241m.\u001b[39msetter\n\u001b[1;32m    200\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdata_folder\u001b[39m(\u001b[38;5;28mself\u001b[39m, value: \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m    201\u001b[0m     \u001b[38;5;66;03m# check the data folder exists\u001b[39;00m\n\u001b[0;32m--> 202\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcheck_folder_exists\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    203\u001b[0m     \u001b[38;5;66;03m# set the data folder\u001b[39;00m\n\u001b[1;32m    204\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_data_folder \u001b[38;5;241m=\u001b[39m value\n",
-      "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/prompto/src/prompto/settings.py:154\u001b[0m, in \u001b[0;36mSettings.check_folder_exists\u001b[0;34m(data_folder)\u001b[0m\n\u001b[1;32m    152\u001b[0m \u001b[38;5;66;03m# check if data folder exists\u001b[39;00m\n\u001b[1;32m    153\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39misdir(data_folder):\n\u001b[0;32m--> 154\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    155\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mData folder \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdata_folder\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m must be a valid path to a folder\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    156\u001b[0m     )\n\u001b[1;32m    158\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
+      "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/prompto/src/prompto/settings.py:162\u001b[0m, in \u001b[0;36mSettings.data_folder\u001b[0;34m(self, value)\u001b[0m\n\u001b[1;32m    159\u001b[0m \u001b[38;5;129m@data_folder\u001b[39m\u001b[38;5;241m.\u001b[39msetter\n\u001b[1;32m    160\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdata_folder\u001b[39m(\u001b[38;5;28mself\u001b[39m, value: \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m    161\u001b[0m     \u001b[38;5;66;03m# check the data folder exists\u001b[39;00m\n\u001b[0;32m--> 162\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcheck_folder_exists\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    163\u001b[0m     \u001b[38;5;66;03m# set the data folder\u001b[39;00m\n\u001b[1;32m    164\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_data_folder \u001b[38;5;241m=\u001b[39m value\n",
+      "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/prompto/src/prompto/settings.py:114\u001b[0m, in \u001b[0;36mSettings.check_folder_exists\u001b[0;34m(data_folder)\u001b[0m\n\u001b[1;32m    112\u001b[0m \u001b[38;5;66;03m# check if data folder exists\u001b[39;00m\n\u001b[1;32m    113\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39misdir(data_folder):\n\u001b[0;32m--> 114\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    115\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mData folder \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdata_folder\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m must be a valid path to a folder\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    116\u001b[0m     )\n\u001b[1;32m    118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
       "\u001b[0;31mValueError\u001b[0m: Data folder 'unknown_folder' must be a valid path to a folder"
      ]
     }
@@ -348,7 +348,7 @@
     "- `log_file`: the path to the log file for the experiment, e.g. `data_folder/output_folder/experiment_name/{creation_time}_experiment_name.log`\n",
     "- `input_file_path`: the path to the input JSONL file, e.g. `data_folder/input_folder/experiment_name.jsonl`\n",
     "- `output_completed_jsonl_file_path`: the path to the completed output JSONL file, e.g. `data_folder/output_folder/experiment_name/completed-experiment_name.jsonl`\n",
-    "- `output_input_file_path`: the path to the input output JSONL file, e.g. `data_folder/output_folder/experiment_name/input-experiment_name.jsonl` (this is just for logging to know what the input to the experiment was)\n",
+    "- `output_input_jsonl_file_out_path`: the path to the input output JSONL file, e.g. `data_folder/output_folder/experiment_name/input-experiment_name.jsonl` (this is just for logging to know what the input to the experiment was)\n",
     "\n",
     "Essentially, when initialising an `Experiment` object, we construct all the paths that are relevant to that particular experiment such as the log file, the input file path, and the file paths for storing the final output for the experiment. \n",
     "\n",
@@ -396,7 +396,7 @@
     {
      "data": {
       "text/plain": [
-       "'25-06-2024-19-14-47'"
+       "'09-07-2024-11-59-54'"
       ]
      },
      "execution_count": 17,
@@ -510,9 +510,9 @@
       "experiment.file_name: test.jsonl\n",
       "experiment.input_file_path: data2/input/test.jsonl\n",
       "experiment.output_folder: data2/output/test\n",
-      "experiment.output_input_jsonl_file_out_path: data2/output/test/25-06-2024-19-14-47-input-test.jsonl\n",
-      "experiment.output_completed_jsonl_file_path: data2/output/test/25-06-2024-19-14-47-completed-test.jsonl\n",
-      "experiment.log_file: data2/output/test/25-06-2024-19-14-47-log-test.txt\n"
+      "experiment.output_input_jsonl_file_out_path: data2/output/test/24-09-2024-09-13-56-input-test.jsonl\n",
+      "experiment.output_completed_jsonl_file_path: data2/output/test/24-09-2024-09-13-56-completed-test.jsonl\n",
+      "experiment.log_file: data2/output/test/24-09-2024-09-13-56-log-test.txt\n"
      ]
     }
    ],
@@ -591,8 +591,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Sending 3 queries  (attempt 1/5): 100%|██████████| 3/3 [00:03<00:00,  1.20s/query]\n",
-      "Waiting for responses  (attempt 1/5): 100%|██████████| 3/3 [00:00<00:00, 514.85query/s]\n"
+      "Sending 3 queries at 50 QPM with RI of 1.2s (attempt 1/5): 100%|██████████| 3/3 [00:03<00:00,  1.20s/query]\n",
+      "Waiting for responses (attempt 1/5): 100%|██████████| 3/3 [00:00<00:00, 352.44query/s]\n"
      ]
     }
    ],
@@ -677,13 +677,112 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "If we look at the output, we can see we got errors that there were `NotImplementedErrors` as the model was not implemented. To see the models implemented, there is a dictionary of models in the `apis` module called `ASYNC_APIS` where the keys are the API names and the values are the corresponding classes."
+    "After running the experiment, you can also see the output as a dataframe too:"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 27,
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>prompt</th>\n",
+       "      <th>api</th>\n",
+       "      <th>model_name</th>\n",
+       "      <th>parameters</th>\n",
+       "      <th>response</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>9</td>\n",
+       "      <td>[Hello, My name is Bob and I'm 6 years old, Ho...</td>\n",
+       "      <td>unknown-api</td>\n",
+       "      <td>unknown-model-name</td>\n",
+       "      <td>{'candidate_count': 1, 'max_output_tokens': 64...</td>\n",
+       "      <td>NotImplementedError - API unknown-api not reco...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>10</td>\n",
+       "      <td>[Can you give me a random number between 1-10?...</td>\n",
+       "      <td>unknown-api</td>\n",
+       "      <td>unknown-model-name</td>\n",
+       "      <td>{'candidate_count': 1, 'max_output_tokens': 12...</td>\n",
+       "      <td>NotImplementedError - API unknown-api not reco...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>11</td>\n",
+       "      <td>How many theaters are there in London's South ...</td>\n",
+       "      <td>unknown-api</td>\n",
+       "      <td>unknown-model-name</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NotImplementedError - API unknown-api not reco...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   id                                             prompt          api  \\\n",
+       "0   9  [Hello, My name is Bob and I'm 6 years old, Ho...  unknown-api   \n",
+       "1  10  [Can you give me a random number between 1-10?...  unknown-api   \n",
+       "2  11  How many theaters are there in London's South ...  unknown-api   \n",
+       "\n",
+       "           model_name                                         parameters  \\\n",
+       "0  unknown-model-name  {'candidate_count': 1, 'max_output_tokens': 64...   \n",
+       "1  unknown-model-name  {'candidate_count': 1, 'max_output_tokens': 12...   \n",
+       "2  unknown-model-name                                                NaN   \n",
+       "\n",
+       "                                            response  \n",
+       "0  NotImplementedError - API unknown-api not reco...  \n",
+       "1  NotImplementedError - API unknown-api not reco...  \n",
+       "2  NotImplementedError - API unknown-api not reco...  "
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "experiment.completed_responses_dataframe"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If we look at the output, we can see we got errors that there were `NotImplementedErrors` as the model was not implemented. To see the models implemented, there is a dictionary of models in the `apis` module called `ASYNC_APIS` where the keys are the API names and the values are the corresponding classes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -691,6 +790,7 @@
        "{'test': prompto.apis.testing.testing_api.TestAPI,\n",
        " 'azure-openai': prompto.apis.azure_openai.azure_openai.AzureOpenAIAPI,\n",
        " 'openai': prompto.apis.openai.openai.OpenAIAPI,\n",
+       " 'anthropic': prompto.apis.anthropic.anthropic.AnthropicAPI,\n",
        " 'gemini': prompto.apis.gemini.gemini.GeminiAPI,\n",
        " 'vertexai': prompto.apis.vertexai.vertexai.VertexAIAPI,\n",
        " 'ollama': prompto.apis.ollama.ollama.OllamaAPI,\n",
@@ -698,7 +798,7 @@
        " 'quart': prompto.apis.quart.quart.QuartAPI}"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -720,7 +820,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -743,7 +843,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [
     {
@@ -788,7 +888,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
@@ -797,7 +897,7 @@
        "['test2.jsonl']"
       ]
      },
-     "execution_count": 30,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -808,7 +908,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
@@ -840,7 +940,7 @@
        "  'model_name': 'test'}]"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -852,15 +952,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Sending 3 queries  (attempt 1/5): 100%|██████████| 3/3 [00:03<00:00,  1.20s/query]\n",
-      "Waiting for responses  (attempt 1/5): 100%|██████████| 3/3 [00:01<00:00,  2.99query/s]\n"
+      "Sending 3 queries at 50 QPM with RI of 1.2s (attempt 1/5): 100%|██████████| 3/3 [00:03<00:00,  1.20s/query]\n",
+      "Waiting for responses (attempt 1/5): 100%|██████████| 3/3 [00:00<00:00, 830.39query/s]\n"
      ]
     }
    ],
@@ -870,7 +970,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
@@ -886,6 +986,7 @@
        "   'max_output_tokens': 64,\n",
        "   'temperature': 1,\n",
        "   'top_k': 40},\n",
+       "  'timestamp_sent': '24-09-2024-09-14-39',\n",
        "  'response': 'This is a test response'},\n",
        " {'id': 10,\n",
        "  'prompt': ['Can you give me a random number between 1-10?',\n",
@@ -897,15 +998,17 @@
        "   'max_output_tokens': 128,\n",
        "   'temperature': 0.5,\n",
        "   'top_k': 40},\n",
+       "  'timestamp_sent': '24-09-2024-09-14-40',\n",
        "  'response': 'This is a test response'},\n",
        " {'id': 11,\n",
        "  'prompt': \"How many theaters are there in London's South End?\",\n",
        "  'api': 'test',\n",
        "  'model_name': 'test',\n",
-       "  'response': 'This is a test response'}]"
+       "  'timestamp_sent': '24-09-2024-09-14-41',\n",
+       "  'response': 'ValueError - This is a test error which we should handle and return'}]"
       ]
      },
-     "execution_count": 33,
+     "execution_count": 34,
      "metadata": {},
      "output_type": "execute_result"
     }

From 53869a089e215392f30fadc2c3379dc606495bbf Mon Sep 17 00:00:00 2001
From: rchan <rchan@turing.ac.uk>
Date: Tue, 24 Sep 2024 10:31:17 +0100
Subject: [PATCH 3/7] test experiment methods for csv input and output

---
 pyproject.toml                |   2 +-
 src/prompto/experiment.py     |  13 +-
 tests/conftest.py             |   6 +
 tests/core/test_experiment.py | 327 +++++++++++++++++++++++++++++++++-
 4 files changed, 340 insertions(+), 8 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 84a34151..a652d301 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,7 +41,7 @@ huggingface-hub = { version = "^0.23.4", optional = true }
 quart = { version = "^0.19.6", optional = true }
 transformers = { version = "^4.44.2", optional = true }
 torch = { version = "^2.4.1", optional = true }
-accelerate = { version = "^0.31.0", optional = true }
+accelerate = { version = "^0.34.2", optional = true }
 aiohttp = { version = "^3.9.5", optional = true }
 anthropic = { version = "^0.31.1", optional = true }
 
diff --git a/src/prompto/experiment.py b/src/prompto/experiment.py
index 6811b35f..4a6f23a7 100644
--- a/src/prompto/experiment.py
+++ b/src/prompto/experiment.py
@@ -71,7 +71,7 @@ def __init__(
             )
 
         # read in the experiment data
-        self._experiment_prompts = self._read_input_file()
+        self._experiment_prompts = self._read_input_file(self.input_file_path)
 
         # set the number of queries
         self.number_queries: int = len(self._experiment_prompts)
@@ -111,11 +111,12 @@ def __init__(
     def __str__(self) -> str:
         return self.file_name
 
-    def _read_input_file(self) -> list[dict]:
-        with open(self.input_file_path, "r") as f:
-            if self.input_file_path.endswith(".jsonl"):
+    @staticmethod
+    def _read_input_file(input_file_path) -> list[dict]:
+        with open(input_file_path, "r") as f:
+            if input_file_path.endswith(".jsonl"):
                 experiment_prompts: list[dict] = [dict(json.loads(line)) for line in f]
-            elif self.input_file_path.endswith(".csv"):
+            elif input_file_path.endswith(".csv"):
                 experiment_prompts: list[dict] = pd.read_csv(f).to_dict(
                     orient="records"
                 )
@@ -801,5 +802,5 @@ def save_completed_responses_to_csv(self, filename: str = None) -> None:
         if filename is None:
             filename = self.output_completed_jsonl_file_path.replace(".jsonl", ".csv")
 
-        logging.info(f"Saving completed responses (as csv) to {filename}...")
+        logging.info(f"Saving completed responses as csv to {filename}...")
         self.completed_responses_dataframe.to_csv(filename, index=False)
diff --git a/tests/conftest.py b/tests/conftest.py
index f34a519e..0386ac28 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -44,6 +44,7 @@ def temporary_data_folders(tmp_path: Path):
         ├── media/
     ├── .env
     ├── test.txt
+    ├── test.csv
     └── test.jsonl
     """
     # create data folders
@@ -58,6 +59,11 @@ def temporary_data_folders(tmp_path: Path):
     with open(Path(tmp_path / "test.txt"), "w") as f:
         f.write("test line")
 
+    # create a csv file in the folder
+    with open(Path(tmp_path / "test.csv"), "w") as f:
+        f.write("prompt,api,model_name\n")
+        f.write("test prompt,test,test_model\n")
+
     # create a jsonl file in the folder
     with open(Path(tmp_path / "test.jsonl"), "w") as f:
         f.write(
diff --git a/tests/core/test_experiment.py b/tests/core/test_experiment.py
index 4604d3a6..c490d1a3 100644
--- a/tests/core/test_experiment.py
+++ b/tests/core/test_experiment.py
@@ -1,6 +1,9 @@
 import logging
+import os
 
+import pandas as pd
 import pytest
+import regex as re
 
 from prompto.experiment import Experiment
 from prompto.settings import Settings
@@ -19,7 +22,7 @@ def test_experiment_init_errors(temporary_data_folders):
     with pytest.raises(TypeError, match="missing 1 required positional argument"):
         Experiment(settings=Settings())
 
-    # passing in a filename that is not a .jsonl file should raise a ValueError
+    # passing in a filename that is not a .jsonl or a .csv file should raise a ValueError
     with pytest.raises(ValueError, match="Experiment file must be a jsonl or csv file"):
         Experiment("test.txt", settings=Settings())
 
@@ -31,6 +34,43 @@ def test_experiment_init_errors(temporary_data_folders):
         Experiment("test.jsonl", settings=Settings())
 
 
+def test_experiment_read_input_file_jsonl(temporary_data_folders):
+    # create a jsonl file
+    with open("test_in_input.jsonl", "w") as f:
+        f.write(
+            '{"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"}\n'
+        )
+        f.write(
+            '{"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"}\n'
+        )
+
+    experiment_prompts = Experiment._read_input_file("test_in_input.jsonl")
+    assert experiment_prompts == [
+        {"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"},
+        {"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"},
+    ]
+
+
+def test_experiment_read_input_file_csv(temporary_data_folders):
+    # create a csv file
+    with open("test_in_input.csv", "w") as f:
+        f.write("id,prompt,api,model_name\n")
+        f.write("0,test prompt 0,test,test_model\n")
+        f.write("1,test prompt 1,test,test_model\n")
+
+    experiment_prompts = Experiment._read_input_file("test_in_input.csv")
+    assert experiment_prompts == [
+        {"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"},
+        {"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"},
+    ]
+
+
+def test_experiment_read_input_file_error(temporary_data_folders):
+    # passing in a filename that is not a .jsonl or a .csv file should raise a ValueError
+    with pytest.raises(ValueError, match="Experiment file must be a jsonl or csv file"):
+        Experiment._read_input_file("test.txt")
+
+
 def test_experiment_init(temporary_data_folders):
     # create a settings object
     settings = Settings(data_folder="data", max_queries=50, max_attempts=5)
@@ -85,6 +125,161 @@ def test_experiment_init(temporary_data_folders):
     assert experiment._grouped_experiment_prompts == {}
 
     assert experiment.completed_responses == []
+    assert experiment._completed_responses_dataframe is None
+
+
+def test_completed_responses_dataframe_getter(temporary_data_folders):
+    # create a settings object
+    settings = Settings(data_folder="data", max_queries=50, max_attempts=5)
+
+    # create a jsonl file in the input folder (which is created when initialising Settings object)
+    with open("data/input/test_in_input.jsonl", "w") as f:
+        f.write(
+            '{"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"}\n'
+        )
+        f.write(
+            '{"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"}\n'
+        )
+
+    # create an experiment object
+    experiment = Experiment("test_in_input.jsonl", settings=settings)
+
+    # if experiment hasn't been ran yet, the dataframe should be None
+    assert experiment._completed_responses_dataframe is None
+
+    # if trying to obtain it without first running experiment, it should raise an error
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "No completed responses to convert to a DataFrame "
+            "(completed_responses attribute is empty). "
+            "Run the process method to obtain the completed responses"
+        ),
+    ):
+        experiment.completed_responses_dataframe
+
+    # we will set the completed_responses attribute to a list of dictionaries
+    # and then check that the dataframe is created correctly
+    experiment.completed_responses = [
+        {
+            "id": 0,
+            "prompt": "test prompt 0",
+            "api": "test",
+            "model_name": "test_model",
+            "response": "response 0",
+        },
+        {
+            "id": 1,
+            "prompt": "test prompt 1",
+            "api": "test",
+            "model_name": "test_model",
+            "response": "response 1",
+        },
+    ]
+
+    # check the dataframe is created correctly when calling the getter
+    assert isinstance(experiment.completed_responses_dataframe, pd.DataFrame)
+    assert experiment.completed_responses_dataframe.equals(
+        experiment._completed_responses_dataframe
+    )
+    assert experiment.completed_responses_dataframe.equals(
+        pd.DataFrame(
+            {
+                "id": [0, 1],
+                "prompt": ["test prompt 0", "test prompt 1"],
+                "api": ["test", "test"],
+                "model_name": ["test_model", "test_model"],
+                "response": ["response 0", "response 1"],
+            }
+        )
+    )
+
+
+def test_completed_responses_dataframe_getter_different_keys(temporary_data_folders):
+    # create a settings object
+    settings = Settings(data_folder="data", max_queries=50, max_attempts=5)
+
+    # create a jsonl file in the input folder (which is created when initialising Settings object)
+    with open("data/input/test_in_input.jsonl", "w") as f:
+        f.write(
+            '{"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"}\n'
+        )
+        f.write(
+            '{"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"}\n'
+        )
+
+    # create an experiment object
+    experiment = Experiment("test_in_input.jsonl", settings=settings)
+
+    # if experiment hasn't been ran yet, the dataframe should be None
+    assert experiment._completed_responses_dataframe is None
+
+    # if trying to obtain it without first running experiment, it should raise an error
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "No completed responses to convert to a DataFrame "
+            "(completed_responses attribute is empty). "
+            "Run the process method to obtain the completed responses"
+        ),
+    ):
+        experiment.completed_responses_dataframe
+
+    # we will set the completed_responses attribute to a list of dictionaries
+    # and then check that the dataframe is created correctly
+    experiment.completed_responses = [
+        {
+            "id": 0,
+            "prompt": "test prompt 0",
+            "api": "test",
+            "model_name": "test_model",
+            "response": "response 0",
+        },
+        {
+            "id": 1,
+            "prompt": "test prompt 1",
+            "api": "test",
+            "model_name": "test_model",
+            "response": "response 1",
+            "extra_key": "extra_value",
+        },
+    ]
+
+    # check the dataframe is created correctly when calling the getter
+    assert isinstance(experiment.completed_responses_dataframe, pd.DataFrame)
+    assert experiment.completed_responses_dataframe.equals(
+        experiment._completed_responses_dataframe
+    )
+    assert experiment.completed_responses_dataframe.equals(
+        pd.DataFrame(
+            {
+                "id": [0, 1],
+                "prompt": ["test prompt 0", "test prompt 1"],
+                "api": ["test", "test"],
+                "model_name": ["test_model", "test_model"],
+                "response": ["response 0", "response 1"],
+                "extra_key": [None, "extra_value"],
+            }
+        )
+    )
+
+
+def test_completed_responses_dataframe_setter(temporary_data_folders):
+    # raise an error if trying to set the completed_responses_dataframe attribute
+    settings = Settings(data_folder="data", max_queries=50, max_attempts=5)
+    with open("data/input/test_in_input.jsonl", "w") as f:
+        f.write(
+            '{"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"}\n'
+        )
+        f.write(
+            '{"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"}\n'
+        )
+
+    experiment = Experiment("test_in_input.jsonl", settings=settings)
+    with pytest.raises(
+        AttributeError, match="Cannot set the completed_responses_dataframe attribute"
+    ):
+        experiment.completed_responses_dataframe = pd.DataFrame()
 
 
 def test_experiment_grouped_prompts_simple(temporary_data_folders, caplog):
@@ -2740,3 +2935,133 @@ def test_rate_limit_docs_example_6(temporary_rate_limit_doc_examples):
         "gemini": "4 queries at 5 queries per minute",
         "openai": "4 queries at 5 queries per minute",
     }
+
+
+def test_obtain_completed_responses_dataframe(temporary_data_folders):
+    settings = Settings(data_folder="data", max_queries=50, max_attempts=5)
+    with open("data/input/test_in_input.jsonl", "w") as f:
+        f.write(
+            '{"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"}\n'
+        )
+        f.write(
+            '{"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"}\n'
+        )
+
+    experiment = Experiment("test_in_input.jsonl", settings=settings)
+
+    # if experiment note run, calling this method should raise an error
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "No completed responses to convert to a DataFrame "
+            "(completed_responses attribute is empty). "
+            "Run the process method to obtain the completed responses"
+        ),
+    ):
+        experiment._obtain_completed_responses_dataframe()
+
+    # we will set the completed_responses attribute to a list of dictionaries
+    # and then check that the dataframe is created correctly
+    experiment.completed_responses = [
+        {
+            "id": 0,
+            "prompt": "test prompt 0",
+            "api": "test",
+            "model_name": "test_model",
+            "response": "response 0",
+        },
+        {
+            "id": 1,
+            "prompt": "test prompt 1",
+            "api": "test",
+            "model_name": "test_model",
+            "response": "response 1",
+        },
+    ]
+
+    # check the dataframe is created correctly when calling the getter
+    assert isinstance(experiment._obtain_completed_responses_dataframe(), pd.DataFrame)
+    assert experiment._obtain_completed_responses_dataframe().equals(
+        pd.DataFrame(
+            {
+                "id": [0, 1],
+                "prompt": ["test prompt 0", "test prompt 1"],
+                "api": ["test", "test"],
+                "model_name": ["test_model", "test_model"],
+                "response": ["response 0", "response 1"],
+            }
+        )
+    )
+
+
+def test_save_completed_responses_to_csv(temporary_data_folders, caplog):
+    caplog.set_level(logging.INFO)
+
+    settings = Settings(data_folder="data", max_queries=50, max_attempts=5)
+    with open("data/input/test_in_input.jsonl", "w") as f:
+        f.write(
+            '{"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"}\n'
+        )
+        f.write(
+            '{"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"}\n'
+        )
+
+    experiment = Experiment("test_in_input.jsonl", settings=settings)
+
+    # we will set the completed_responses attribute to a list of dictionaries
+    # and then check that the dataframe is created correctly
+    experiment.completed_responses = [
+        {
+            "id": 0,
+            "prompt": "test prompt 0",
+            "api": "test",
+            "model_name": "test_model",
+            "response": "response 0",
+        },
+        {
+            "id": 1,
+            "prompt": "test prompt 1",
+            "api": "test",
+            "model_name": "test_model",
+            "response": "response 1",
+        },
+    ]
+
+    # save the completed responses to a csv file
+    experiment.save_completed_responses_to_csv("test_out.csv")
+
+    # check the csv file is created correctly
+    assert os.path.exists("test_out.csv")
+
+    # check the csv file content
+    expected = pd.DataFrame(
+        {
+            "id": [0, 1],
+            "prompt": ["test prompt 0", "test prompt 1"],
+            "api": ["test", "test"],
+            "model_name": ["test_model", "test_model"],
+            "response": ["response 0", "response 1"],
+        }
+    )
+    loaded_csv = pd.read_csv("test_out.csv")
+    assert loaded_csv.equals(expected)
+
+    # check logs
+    assert "Saving completed responses as csv to test_out.csv" in caplog.text
+
+    # save the completed responses to a csv file without specifying the file name
+    os.makedirs("data/output/test_in_input/", exist_ok=True)
+    experiment.save_completed_responses_to_csv()
+
+    # check the csv file is created correctly
+    filename = (
+        f"data/output/test_in_input/{experiment.start_time}-completed-test_in_input.csv"
+    )
+    assert os.path.exists(filename)
+
+    # check the csv file content
+    loaded_csv = pd.read_csv(filename)
+    assert loaded_csv.equals(expected)
+
+    # check logs
+    assert f"Saving completed responses as csv to {filename}" in caplog.text

From 002aa4f8c76f59ea6e67b450cbc9af37c989d541 Mon Sep 17 00:00:00 2001
From: rchan <rchan@turing.ac.uk>
Date: Tue, 24 Sep 2024 11:51:53 +0100
Subject: [PATCH 4/7] add logic for case where parameters are in csv

---
 src/prompto/experiment.py     |  39 ++++++++-
 tests/core/test_experiment.py | 146 +++++++++++++++++++++++++++++++++-
 2 files changed, 180 insertions(+), 5 deletions(-)

diff --git a/src/prompto/experiment.py b/src/prompto/experiment.py
index 4a6f23a7..8845cc70 100644
--- a/src/prompto/experiment.py
+++ b/src/prompto/experiment.py
@@ -115,11 +115,35 @@ def __str__(self) -> str:
     def _read_input_file(input_file_path) -> list[dict]:
         with open(input_file_path, "r") as f:
             if input_file_path.endswith(".jsonl"):
+                logging.info(
+                    f"Loading experiment prompts from jsonl file {input_file_path}..."
+                )
                 experiment_prompts: list[dict] = [dict(json.loads(line)) for line in f]
             elif input_file_path.endswith(".csv"):
-                experiment_prompts: list[dict] = pd.read_csv(f).to_dict(
-                    orient="records"
+                logging.info(
+                    f"Loading experiment prompts from csv file {input_file_path}..."
                 )
+                loaded_df = pd.read_csv(f)
+                parameters_col_names = [
+                    col for col in loaded_df.columns if "parameters-" in col
+                ]
+                if len(parameters_col_names) > 0:
+                    # take the "parameters-" column names and create new column "parameters"
+                    # with the values as a dictionary of the parameters
+                    logging.info(f"Found parameters columns: {parameters_col_names}")
+                    loaded_df["parameters"] = [
+                        {
+                            parameter.removeprefix("parameters-"): row[parameter]
+                            for parameter in parameters_col_names
+                            if not pd.isna(row[parameter])
+                        }
+                        for _, row in tqdm(
+                            loaded_df.iterrows(),
+                            desc="Parsing parameters columns for data frame",
+                            unit="row",
+                        )
+                    ]
+                experiment_prompts: list[dict] = loaded_df.to_dict(orient="records")
             else:
                 raise ValueError("Experiment file must be a jsonl or csv file")
 
@@ -803,4 +827,13 @@ def save_completed_responses_to_csv(self, filename: str = None) -> None:
             filename = self.output_completed_jsonl_file_path.replace(".jsonl", ".csv")
 
         logging.info(f"Saving completed responses as csv to {filename}...")
-        self.completed_responses_dataframe.to_csv(filename, index=False)
+        if "parameters" in self.completed_responses_dataframe.columns:
+            # make a copy and convert the parameters column (which should be of dict type) to a json string
+            completed_responses_dataframe = self.completed_responses_dataframe.copy()
+            completed_responses_dataframe["parameters"] = completed_responses_dataframe[
+                "parameters"
+            ].apply(json.dumps)
+        else:
+            completed_responses_dataframe = self.completed_responses_dataframe
+
+        completed_responses_dataframe.to_csv(filename, index=False)
diff --git a/tests/core/test_experiment.py b/tests/core/test_experiment.py
index c490d1a3..e89fb448 100644
--- a/tests/core/test_experiment.py
+++ b/tests/core/test_experiment.py
@@ -34,7 +34,8 @@ def test_experiment_init_errors(temporary_data_folders):
         Experiment("test.jsonl", settings=Settings())
 
 
-def test_experiment_read_input_file_jsonl(temporary_data_folders):
+def test_experiment_read_input_file_jsonl(temporary_data_folders, caplog):
+    caplog.set_level(logging.INFO)
     # create a jsonl file
     with open("test_in_input.jsonl", "w") as f:
         f.write(
@@ -49,9 +50,13 @@ def test_experiment_read_input_file_jsonl(temporary_data_folders):
         {"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"},
         {"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"},
     ]
+    assert (
+        "Loading experiment prompts from jsonl file test_in_input.jsonl" in caplog.text
+    )
 
 
-def test_experiment_read_input_file_csv(temporary_data_folders):
+def test_experiment_read_input_file_csv(temporary_data_folders, caplog):
+    caplog.set_level(logging.INFO)
     # create a csv file
     with open("test_in_input.csv", "w") as f:
         f.write("id,prompt,api,model_name\n")
@@ -63,6 +68,62 @@ def test_experiment_read_input_file_csv(temporary_data_folders):
         {"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"},
         {"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"},
     ]
+    assert "Loading experiment prompts from csv file test_in_input.csv" in caplog.text
+
+
+def test_experiment_read_input_file_csv_with_parameters(temporary_data_folders, caplog):
+    caplog.set_level(logging.INFO)
+    # create a csv file
+    with open("test_in_input.csv", "w") as f:
+        f.write(
+            "id,prompt,api,model_name,parameters-temperature,parameters-max-output-tokens\n"
+        )
+        f.write("0,test prompt 0,test,test_model,0.9,100\n")
+        f.write("1,test prompt 1,test,test_model,None,100\n")
+        f.write("2,test prompt 2,test,test_model,,100\n")
+
+    experiment_prompts = Experiment._read_input_file("test_in_input.csv")
+
+    # a hack to compare the dictionaries without worrying about the NaN values
+    # NaNs should occur in experiment_prompts[1]["parameters-temperature"] and experiment_prompts[2]["parameters-temperature"]
+    assert pd.isna(experiment_prompts[1]["parameters-temperature"])
+    assert pd.isna(experiment_prompts[2]["parameters-temperature"])
+    # remove them for now
+    experiment_prompts[1].pop("parameters-temperature")
+    experiment_prompts[2].pop("parameters-temperature")
+
+    assert experiment_prompts == [
+        {
+            "id": 0,
+            "prompt": "test prompt 0",
+            "api": "test",
+            "model_name": "test_model",
+            "parameters-temperature": 0.9,
+            "parameters-max-output-tokens": 100,
+            "parameters": {"temperature": 0.9, "max-output-tokens": 100},
+        },
+        {
+            "id": 1,
+            "prompt": "test prompt 1",
+            "api": "test",
+            "model_name": "test_model",
+            "parameters-max-output-tokens": 100,
+            "parameters": {"max-output-tokens": 100},
+        },
+        {
+            "id": 2,
+            "prompt": "test prompt 2",
+            "api": "test",
+            "model_name": "test_model",
+            "parameters-max-output-tokens": 100,
+            "parameters": {"max-output-tokens": 100},
+        },
+    ]
+    assert (
+        "Found parameters columns: ['parameters-temperature', 'parameters-max-output-tokens']"
+        in caplog.text
+    )
+    assert "Loading experiment prompts from csv file test_in_input.csv" in caplog.text
 
 
 def test_experiment_read_input_file_error(temporary_data_folders):
@@ -3065,3 +3126,84 @@ def test_save_completed_responses_to_csv(temporary_data_folders, caplog):
 
     # check logs
     assert f"Saving completed responses as csv to {filename}" in caplog.text
+
+
+def test_save_completed_responses_to_csv_with_parameters(
+    temporary_data_folders, caplog
+):
+    caplog.set_level(logging.INFO)
+
+    settings = Settings(data_folder="data", max_queries=50, max_attempts=5)
+    with open("data/input/test_in_input.jsonl", "w") as f:
+        f.write(
+            '{"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"}\n'
+        )
+        f.write(
+            '{"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"}\n'
+        )
+
+    experiment = Experiment("test_in_input.jsonl", settings=settings)
+
+    # we will set the completed_responses attribute to a list of dictionaries
+    # and then check that the dataframe is created correctly
+    experiment.completed_responses = [
+        {
+            "id": 0,
+            "prompt": "test prompt 0",
+            "api": "test",
+            "model_name": "test_model",
+            "response": "response 0",
+            "parameters": {"temperature": 0.5, "max_tokens": 100},
+        },
+        {
+            "id": 1,
+            "prompt": "test prompt 1",
+            "api": "test",
+            "model_name": "test_model",
+            "response": "response 1",
+            "parameters": {"max_tokens": 100},
+        },
+    ]
+
+    # save the completed responses to a csv file
+    experiment.save_completed_responses_to_csv("test_out.csv")
+
+    # check the csv file is created correctly
+    assert os.path.exists("test_out.csv")
+
+    # check the csv file content
+    expected = pd.DataFrame(
+        {
+            "id": [0, 1],
+            "prompt": ["test prompt 0", "test prompt 1"],
+            "api": ["test", "test"],
+            "model_name": ["test_model", "test_model"],
+            "response": ["response 0", "response 1"],
+            "parameters": [
+                '{"temperature": 0.5, "max_tokens": 100}',
+                '{"max_tokens": 100}',
+            ],
+        }
+    )
+    loaded_csv = pd.read_csv("test_out.csv")
+    assert loaded_csv.equals(expected)
+
+    # check logs
+    assert "Saving completed responses as csv to test_out.csv" in caplog.text
+
+    # save the completed responses to a csv file without specifying the file name
+    os.makedirs("data/output/test_in_input/", exist_ok=True)
+    experiment.save_completed_responses_to_csv()
+
+    # check the csv file is created correctly
+    filename = (
+        f"data/output/test_in_input/{experiment.start_time}-completed-test_in_input.csv"
+    )
+    assert os.path.exists(filename)
+
+    # check the csv file content
+    loaded_csv = pd.read_csv(filename)
+    assert loaded_csv.equals(expected)
+
+    # check logs
+    assert f"Saving completed responses as csv to {filename}" in caplog.text

From 20a7edabc10d1a81c1b0643932764f706540f8e9 Mon Sep 17 00:00:00 2001
From: rchan <rchan@turing.ac.uk>
Date: Tue, 24 Sep 2024 13:01:00 +0100
Subject: [PATCH 5/7] test experiment processing with csv input

---
 src/prompto/apis/testing/testing_api.py |   4 +-
 src/prompto/experiment.py               |  17 ++-
 tests/conftest.py                       |  13 +++
 tests/core/test_experiment.py           |  88 +++++++++++++++
 tests/core/test_experiment_process.py   | 141 +++++++++++++++++++++++-
 5 files changed, 254 insertions(+), 9 deletions(-)

diff --git a/src/prompto/apis/testing/testing_api.py b/src/prompto/apis/testing/testing_api.py
index 7d40f627..8cb37174 100644
--- a/src/prompto/apis/testing/testing_api.py
+++ b/src/prompto/apis/testing/testing_api.py
@@ -33,9 +33,9 @@ async def query(self, prompt_dict: dict, index: int | str) -> dict:
         raise_error_option = generation_config.get("raise_error", "")
         raise_error_type = generation_config.get("raise_error_type", "")
 
-        if raise_error_option == "True":
+        if raise_error_option.lower() in ["true", "yes"]:
             raise_error = True
-        elif raise_error_option == "False":
+        elif raise_error_option.lower() in ["false", "no"]:
             raise_error = False
         else:
             raise_error = random.randint(1, 5) == 1
diff --git a/src/prompto/experiment.py b/src/prompto/experiment.py
index 8845cc70..6c76efff 100644
--- a/src/prompto/experiment.py
+++ b/src/prompto/experiment.py
@@ -50,7 +50,9 @@ def __init__(
 
         self.file_name: str = file_name
         # obtain experiment name from file name
-        self.experiment_name: str = self.file_name.removesuffix(".jsonl")
+        self.experiment_name: str = self.file_name.removesuffix(".jsonl").removesuffix(
+            ".csv"
+        )
         # settings for the pipeline which includes input, output, and media folder locations
         self.settings: Settings = settings
         # experiment output folder is a subfolder of the output folder
@@ -112,7 +114,7 @@ def __str__(self) -> str:
         return self.file_name
 
     @staticmethod
-    def _read_input_file(input_file_path) -> list[dict]:
+    def _read_input_file(input_file_path: str) -> list[dict]:
         with open(input_file_path, "r") as f:
             if input_file_path.endswith(".jsonl"):
                 logging.info(
@@ -648,6 +650,10 @@ async def query_model_and_record_response(
             )
         if index is None:
             index = "NA"
+        id = prompt_dict.get("id", "NA")
+        # if id is NaN, set it to "NA"
+        if pd.isna(id):
+            id = "NA"
 
         # query the API
         timeout_seconds = 300
@@ -669,8 +675,7 @@ async def query_model_and_record_response(
         ) as err:
             # don't retry for selected errors, log the error and save an error response
             log_message = (
-                f"Error (i={index}, id={prompt_dict.get('id', 'NA')}): "
-                f"{type(err).__name__} - {err}"
+                f"Error (i={index}, id={id}): " f"{type(err).__name__} - {err}"
             )
             async with FILE_WRITE_LOCK:
                 write_log_message(
@@ -683,7 +688,7 @@ async def query_model_and_record_response(
             if attempt == self.settings.max_attempts:
                 # we've already tried max_attempts times, so log the error and save an error response
                 log_message = (
-                    f"Error (i={index}, id={prompt_dict.get('id', 'NA')}) "
+                    f"Error (i={index}, id={id}) "
                     f"after maximum {self.settings.max_attempts} attempts: "
                     f"{type(err).__name__} - {err}"
                 )
@@ -701,7 +706,7 @@ async def query_model_and_record_response(
             else:
                 # we haven't tried max_attempts times yet, so log the error and return an Exception
                 log_message = (
-                    f"Error (i={index}, id={prompt_dict.get('id', 'NA')}) on attempt "
+                    f"Error (i={index}, id={id}) on attempt "
                     f"{attempt} of {self.settings.max_attempts}: "
                     f"{type(err).__name__} - {err}. Adding to the queue to try again later..."
                 )
diff --git a/tests/conftest.py b/tests/conftest.py
index 0386ac28..9fb16eae 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -449,6 +449,7 @@ def temporary_data_folder_for_processing(tmp_path: Path):
     tmp_path
     ├── data/
         ├── input/
+            ├── test_experiment.csv
             ├── test_experiment.jsonl
             ├── test_experiment_with_groups.jsonl
             └── test_experiment_eval.jsonl
@@ -463,6 +464,18 @@ def temporary_data_folder_for_processing(tmp_path: Path):
     Path(tmp_path / "data" / "output").mkdir()
     Path(tmp_path / "data" / "media").mkdir()
 
+    # create a csv file with a few prompts
+    with open(Path(tmp_path / "data" / "input" / "test_experiment.csv"), "w") as f:
+        f.write(
+            "id,prompt,api,model_name,parameters-raise_error,parameters-raise_error_type\n"
+        )
+        f.write('0,test prompt 1,test,model1,"no",\n')
+        f.write(',test prompt 2,test,model1,"yes","Exception"\n')
+        f.write('1,test prompt 3,test,model1,"yes",\n')
+        f.write('2,test prompt 4,test,model2,"no",\n')
+        f.write('3,test prompt 5,test,model2,"no",\n')
+        f.write('4,test prompt 6,test,model2,"no",\n')
+
     # create a file with larger number of prompts with different APIs, models with no groups
     with open(Path(tmp_path / "data" / "input" / "test_experiment.jsonl"), "w") as f:
         f.write(
diff --git a/tests/core/test_experiment.py b/tests/core/test_experiment.py
index e89fb448..3a6d38a4 100644
--- a/tests/core/test_experiment.py
+++ b/tests/core/test_experiment.py
@@ -189,6 +189,94 @@ def test_experiment_init(temporary_data_folders):
     assert experiment._completed_responses_dataframe is None
 
 
+def test_experiment_init_csv(temporary_data_folders):
+    # create a settings object
+    settings = Settings(data_folder="data", max_queries=50, max_attempts=5)
+
+    # create a csv file in the input folder (which is created when initialising Settings object)
+    with open("data/input/test_in_input.csv", "w") as f:
+        f.write(
+            "id,prompt,api,model_name,parameters-temperature,parameters-max-output-tokens\n"
+        )
+        f.write("0,test prompt 0,test,test_model,0.9,100\n")
+        f.write("1,test prompt 1,test,test_model,0.5,100\n")
+
+    # create an experiment object
+    experiment = Experiment("test_in_input.csv", settings=settings)
+
+    # check the experiment object has the correct attributes
+    assert experiment.file_name == "test_in_input.csv"
+    assert experiment.experiment_name == "test_in_input"
+    assert experiment.settings == settings
+    assert experiment.output_folder == "data/output/test_in_input"
+    assert experiment.input_file_path == "data/input/test_in_input.csv"
+    assert isinstance(experiment.creation_time, str)
+    assert isinstance(experiment.start_time, str)
+    assert (
+        experiment.output_completed_jsonl_file_path
+        == f"data/output/test_in_input/{experiment.start_time}-completed-test_in_input.jsonl"
+    )
+    assert (
+        experiment.output_input_jsonl_file_out_path
+        == f"data/output/test_in_input/{experiment.start_time}-input-test_in_input.jsonl"
+    )
+    assert experiment._experiment_prompts == [
+        {
+            "id": 0,
+            "prompt": "test prompt 0",
+            "api": "test",
+            "model_name": "test_model",
+            "parameters-temperature": 0.9,
+            "parameters-max-output-tokens": 100,
+            "parameters": {"temperature": 0.9, "max-output-tokens": 100},
+        },
+        {
+            "id": 1,
+            "prompt": "test prompt 1",
+            "api": "test",
+            "model_name": "test_model",
+            "parameters-temperature": 0.5,
+            "parameters-max-output-tokens": 100,
+            "parameters": {"temperature": 0.5, "max-output-tokens": 100},
+        },
+    ]
+    # check property getter for experiment_prompts
+    assert experiment.experiment_prompts == [
+        {
+            "id": 0,
+            "prompt": "test prompt 0",
+            "api": "test",
+            "model_name": "test_model",
+            "parameters-temperature": 0.9,
+            "parameters-max-output-tokens": 100,
+            "parameters": {"temperature": 0.9, "max-output-tokens": 100},
+        },
+        {
+            "id": 1,
+            "prompt": "test prompt 1",
+            "api": "test",
+            "model_name": "test_model",
+            "parameters-temperature": 0.5,
+            "parameters-max-output-tokens": 100,
+            "parameters": {"temperature": 0.5, "max-output-tokens": 100},
+        },
+    ]
+    assert experiment.number_queries == 2
+    assert (
+        experiment.log_file
+        == f"data/output/test_in_input/{experiment.start_time}-log-test_in_input.txt"
+    )
+
+    # test str method
+    assert str(experiment) == "test_in_input.csv"
+
+    # test that grouped experiments have not been created yet
+    assert experiment._grouped_experiment_prompts == {}
+
+    assert experiment.completed_responses == []
+    assert experiment._completed_responses_dataframe is None
+
+
 def test_completed_responses_dataframe_getter(temporary_data_folders):
     # create a settings object
     settings = Settings(data_folder="data", max_queries=50, max_attempts=5)
diff --git a/tests/core/test_experiment_process.py b/tests/core/test_experiment_process.py
index 90708bdb..ccfaf736 100644
--- a/tests/core/test_experiment_process.py
+++ b/tests/core/test_experiment_process.py
@@ -152,6 +152,146 @@ async def test_process(
     assert log_msg in caplog.text
 
 
+@pytest.mark.asyncio
+async def test_process_using_csv(
+    temporary_data_folder_for_processing: None,
+    caplog: pytest.LogCaptureFixture,
+    capsys: pytest.CaptureFixture[str],
+):
+    caplog.set_level(logging.INFO)
+    settings = Settings(data_folder="data", max_attempts=2, max_queries=200)
+    experiment = Experiment("test_experiment.csv", settings=settings)
+
+    assert experiment.completed_responses == []
+    assert not os.path.isdir(experiment.output_folder)
+
+    result, avg_query_proc_time = await experiment.process()
+
+    # assert that the output folder was created and input file was moved to it
+    assert os.path.isdir(experiment.output_folder)
+    assert not os.path.isfile("data/input/test_experiment.csv")
+    assert len(os.listdir("data/output/test_experiment")) == 4
+    # assert created files in output
+    assert os.path.isfile(
+        f"data/output/test_experiment/{experiment.start_time}-input-test_experiment.csv"
+    )
+    assert os.path.isfile(
+        f"data/output/test_experiment/{experiment.start_time}-completed-test_experiment.jsonl"
+    )
+    assert os.path.isfile(
+        f"data/output/test_experiment/{experiment.start_time}-input-test_experiment.jsonl"
+    )
+    assert os.path.isfile(
+        f"data/output/test_experiment/{experiment.start_time}-log-test_experiment.txt"
+    )
+
+    # check processing time
+    assert isinstance(avg_query_proc_time, float)
+    assert avg_query_proc_time > 0
+
+    # check result
+    assert len(result) == 6
+    assert experiment.completed_responses == result
+
+    # check that the response is saved to the output file
+    assert os.path.exists(experiment.output_completed_jsonl_file_path)
+    with open(experiment.output_completed_jsonl_file_path, "r") as f:
+        responses = [dict(json.loads(line)) for line in f]
+
+    assert responses == result
+
+    # check the content printed to the console (tqdm progress bar)
+    captured = capsys.readouterr()
+    print_msg = "Sending 6 queries at 200 QPM with RI of 0.3s (attempt 1/2)"
+    assert print_msg in captured.err
+    print_msg = "Waiting for responses (attempt 1/2)"
+    assert print_msg in captured.err
+    print_msg = "Sending 1 queries at 200 QPM with RI of 0.3s (attempt 2/2)"
+    assert print_msg in captured.err
+    print_msg = "Waiting for responses (attempt 2/2)"
+    assert print_msg in captured.err
+
+    # check log messages
+    log_msg = "Processing experiment: test_experiment.csv.."
+    assert log_msg in caplog.text
+    log_msg = (
+        "Moving data/input/test_experiment.csv to "
+        "data/output/test_experiment as "
+        "data/output/test_experiment/"
+        f"{experiment.start_time}-input-test_experiment.csv"
+    )
+    assert log_msg in caplog.text
+    log_msg = (
+        "Converting data/input/test_experiment.csv to jsonl file for processing..."
+    )
+    assert log_msg in caplog.text
+    log_msg = (
+        "Moving data/input/test_experiment.jsonl to "
+        "data/output/test_experiment as "
+        "data/output/test_experiment/"
+        f"{experiment.start_time}-input-test_experiment.jsonl"
+    )
+    log_msg = "Sending 6 queries..."
+    assert log_msg in caplog.text
+    log_msg = (
+        "Response received for model test (i=1, id=0.0)\n"
+        "Prompt: test prompt 1...\n"
+        "Response: This is a test response...\n"
+    )
+    assert log_msg in caplog.text
+    log_msg = (
+        "Error (i=2, id=NA) on attempt 1 of 2: "
+        "Exception - This is a test error which we should handle and return. "
+        "Adding to the queue to try again later..."
+    )
+    assert log_msg in caplog.text
+    log_msg = "Error (i=3, id=1.0): ValueError - This is a test error which we should handle and return"
+    assert log_msg in caplog.text
+    log_msg = (
+        "Error with model test (i=3, id=1.0)\n"
+        "Prompt: test prompt 3...\n"
+        "Error: This is a test error which we should handle and return\n"
+    )
+    log_msg = (
+        "Response received for model test (i=4, id=2.0)\n"
+        "Prompt: test prompt 4...\n"
+        "Response: This is a test response...\n"
+    )
+    assert log_msg in caplog.text
+    log_msg = (
+        "Response received for model test (i=5, id=3.0)\n"
+        "Prompt: test prompt 5...\n"
+        "Response: This is a test response...\n"
+    )
+    assert log_msg in caplog.text
+    log_msg = (
+        "Response received for model test (i=6, id=4.0)\n"
+        "Prompt: test prompt 6...\n"
+        "Response: This is a test response...\n"
+    )
+    assert log_msg in caplog.text
+    log_msg = "Retrying 1 failed queries - attempt 2 of 2..."
+    assert log_msg in caplog.text
+    log_msg = (
+        "Error (i=1, id=NA) after maximum 2 attempts: "
+        "Exception - This is a test error which we should handle and return"
+    )
+    assert log_msg in caplog.text
+    log_msg = (
+        "Error with model test (i=1, id=NA)\n"
+        "Prompt: test prompt 2...\n"
+        "Error: This is a test error which we should handle and return\n"
+    )
+    log_msg = "Maximum attempts reached. Exiting..."
+    assert log_msg in caplog.text
+    log_msg = "Completed experiment: test_experiment.csv! "
+    assert log_msg in caplog.text
+    log_msg = "Experiment processing time: "
+    assert log_msg in caplog.text
+    log_msg = "Average time per query: "
+    assert log_msg in caplog.text
+
+
 @pytest.mark.asyncio
 async def test_process_with_max_queries_dict(
     temporary_data_folder_for_processing: None, caplog, capsys
@@ -777,6 +917,5 @@ async def test_process_with_evaluation(
 
     # check that the evaluation function has been applied
     assert responses == result
-    print(responses)
     assert all(["evaluation" in response for response in responses])
     assert all([response["evaluation"] is True for response in responses])

From fbb36d4963fda8423657c0f3e76bbf4d4379ee15 Mon Sep 17 00:00:00 2001
From: rchan <rchan@turing.ac.uk>
Date: Tue, 24 Sep 2024 13:35:03 +0100
Subject: [PATCH 6/7] test run_experiment command with csv input and output

---
 tests/conftest.py                    |  18 +++--
 tests/scripts/test_run_experiment.py | 107 +++++++++++++++++++++++++++
 2 files changed, 119 insertions(+), 6 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 9fb16eae..894d126c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -524,10 +524,10 @@ def temporary_data_folder_for_processing(tmp_path: Path):
         Path(tmp_path / "data" / "input" / "test_experiment_eval.jsonl"), "w"
     ) as f:
         f.write(
-            '{"id": 0, "api": "test", "model1": "test_model", "prompt": "test prompt 1", "parameters": {"raise_error": "False"}}\n'
+            '{"id": 0, "api": "test", "model_name": "model1", "prompt": "test prompt 1", "parameters": {"raise_error": "False"}}\n'
         )
         f.write(
-            '{"id": 1, "api": "test", "model2": "test_model", "prompt": "test prompt 2", "parameters": {"raise_error": "False"}}\n'
+            '{"id": 1, "api": "test", "model_name": "model2", "prompt": "test prompt 2", "parameters": {"raise_error": "False"}}\n'
         )
 
     # create a file with max queries dictionary
@@ -583,19 +583,25 @@ def temporary_data_folder_judge(tmp_path: Path):
     # create input experiment file not in input folder
     with open(Path(tmp_path / "test-exp-not-in-input.jsonl"), "w") as f:
         f.write(
-            '{"id": 0, "api": "test", "model1": "test_model", "prompt": "test prompt 1", "parameters": {"raise_error": "False"}}\n'
+            '{"id": 0, "api": "test", "model_name": "model1", "prompt": "test prompt 1", "parameters": {"raise_error": "False"}}\n'
         )
         f.write(
-            '{"id": 1, "api": "test", "model2": "test_model", "prompt": "test prompt 2", "parameters": {"raise_error": "False"}}\n'
+            '{"id": 1, "api": "test", "model_name": "model2", "prompt": "test prompt 2", "parameters": {"raise_error": "False"}}\n'
         )
 
+    # create input csv file in input folder
+    with open(Path(tmp_path / "data" / "input" / "test-experiment.csv"), "w") as f:
+        f.write("id,prompt,api,model_name,parameters-raise_error,expected_response\n")
+        f.write('0,test prompt 1,test,model1,"no","This is a test response"\n')
+        f.write('1,test prompt 2,test,model2,"no","something else"\n')
+
     # create input experiment file in input folder
     with open(Path(tmp_path / "data" / "input" / "test-experiment.jsonl"), "w") as f:
         f.write(
-            '{"id": 0, "api": "test", "model1": "test_model", "prompt": "test prompt 1", "parameters": {"raise_error": "False"}, "expected_response": "This is a test response"}\n'
+            '{"id": 0, "api": "test", "model_name": "model1", "prompt": "test prompt 1", "parameters": {"raise_error": "False"}, "expected_response": "This is a test response"}\n'
         )
         f.write(
-            '{"id": 1, "api": "test", "model2": "test_model", "prompt": "test prompt 2", "parameters": {"raise_error": "False"}, "expected_response": "something else"}\n'
+            '{"id": 1, "api": "test", "model_name": "model2", "prompt": "test prompt 2", "parameters": {"raise_error": "False"}, "expected_response": "something else"}\n'
         )
 
     # create a completed experiment file with "response" key in output folder
diff --git a/tests/scripts/test_run_experiment.py b/tests/scripts/test_run_experiment.py
index 267348a5..e6dc1f33 100644
--- a/tests/scripts/test_run_experiment.py
+++ b/tests/scripts/test_run_experiment.py
@@ -766,3 +766,110 @@ def test_run_experiment_judge_and_scorer(temporary_data_folder_judge):
             assert response["input-includes"] is False
         else:
             assert False
+
+
+def test_run_experiment_judge_and_scorer_with_csv_input_and_output(
+    temporary_data_folder_judge,
+):
+    result = shell(
+        "prompto_run_experiment "
+        "--file data/input/test-experiment.csv "
+        "--max-queries=200 "
+        "--judge-folder judge_loc "
+        "--templates template.txt,template2.txt "
+        "--judge judge2 "
+        "--scorer 'match, includes' "
+        "--output-as-csv"
+    )
+    assert result.exit_code == 0
+    assert "No environment file found at .env" in result.stderr
+    assert "Judge folder loaded from judge_loc" in result.stderr
+    assert "Templates to be used: ['template.txt', 'template2.txt']" in result.stderr
+    assert "Judges to be used: ['judge2']" in result.stderr
+    assert "Scoring functions to be used: ['match', 'includes']" in result.stderr
+    assert (
+        "Settings: "
+        "data_folder=data, "
+        "max_queries=200, "
+        "max_attempts=5, "
+        "parallel=False\n"
+        "Subfolders: "
+        "input_folder=data/input, "
+        "output_folder=data/output, "
+        "media_folder=data/media"
+    ) in result.stderr
+    assert (
+        "Starting processing experiment: data/input/test-experiment.csv..."
+        in result.stderr
+    )
+    assert "Completed experiment: test-experiment.csv" in result.stderr
+    assert (
+        "Starting processing judge of experiment: judge-test-experiment.jsonl..."
+        in result.stderr
+    )
+    assert "Completed experiment: judge-test-experiment.jsonl" in result.stderr
+    assert "Experiment processed successfully!" in result.stderr
+    assert os.path.isdir("data/output/test-experiment")
+    assert os.path.isdir("data/output/judge-test-experiment")
+
+    # check the output files for the test-experiment
+    completed_files = [
+        x for x in os.listdir("data/output/test-experiment") if "completed" in x
+    ]
+    # should be 2 (one jsonl and one csv)
+    assert len(completed_files) == 2
+    completed_jsonl_file = [
+        file for file in completed_files if file.endswith(".jsonl")
+    ][0]
+    completed_csv_files = [file for file in completed_files if file.endswith(".csv")]
+    assert len(completed_csv_files) == 1
+
+    # load the output to check the scores have been added
+    with open(f"data/output/test-experiment/{completed_jsonl_file}", "r") as f:
+        responses = [dict(json.loads(line)) for line in f]
+
+    # test that the scorers got added to the completed file
+    assert len(responses) == 2
+    for response in responses:
+        if response["id"] == 0:
+            assert response["match"] is True
+            assert response["includes"] is True
+        elif response["id"] == 1:
+            assert response["match"] is False
+            assert response["includes"] is False
+        else:
+            assert False
+
+    # check the output files for the judge-test-experiment
+    completed_files = [
+        x for x in os.listdir("data/output/judge-test-experiment") if "completed" in x
+    ]
+    # should be 2 (one jsonl and one csv)
+    assert len(completed_files) == 2
+    completed_jsonl_file = [
+        file for file in completed_files if file.endswith(".jsonl")
+    ][0]
+    completed_csv_files = [file for file in completed_files if file.endswith(".csv")]
+    assert len(completed_csv_files) == 1
+
+    # load the output to check the scores have been added
+    with open(f"data/output/judge-test-experiment/{completed_jsonl_file}", "r") as f:
+        responses = [dict(json.loads(line)) for line in f]
+
+    # test that the scorers got added to the completed judge file
+    assert len(responses) == 4
+    for response in responses:
+        if response["id"] == "judge-judge2-template-0":
+            assert response["input-match"] is True
+            assert response["input-includes"] is True
+        elif response["id"] == "judge-judge2-template-1":
+            assert response["input-match"] is False
+            assert response["input-includes"] is False
+        elif response["id"] == "judge-judge2-template2-0":
+            assert response["input-match"] is True
+            assert response["input-includes"] is True
+        elif response["id"] == "judge-judge2-template2-1":
+            assert response["input-match"] is False
+            assert response["input-includes"] is False
+        else:
+            assert False

From 3398e4016624d62920a547ba641c13411452682f Mon Sep 17 00:00:00 2001
From: rchan <rchan@turing.ac.uk>
Date: Tue, 24 Sep 2024 13:47:14 +0100
Subject: [PATCH 7/7] bump version to 0.2.0 and update docs

---
 README.md               |  2 +-
 docs/experiment_file.md | 20 ++++++++++++++++++++
 pyproject.toml          |  2 +-
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index c940e1b1..20ae72b3 100644
--- a/README.md
+++ b/README.md
@@ -33,7 +33,7 @@ A pre-print for this work is available on [arXiv](https://arxiv.org/abs/2408.118
 
 The benefit of  _asynchronous querying_ is that it allows for multiple requests to be sent to an API _without_ having to wait for the LLM's response, which is particularly useful to fully utilise the rate limits of an API. This is especially useful when an experiment file contains a large number of prompts and/or has several models to query. [_Asynchronous programming_](https://docs.python.org/3/library/asyncio.html) is simply a way for programs to avoid getting stuck on long tasks (like waiting for an LLM response from an API) and instead keep running other things at the same time (to send other queries).
 
-With `prompto`, you are able to define your experiments of LLMs in a jsonl file where each line contains the prompt and any parameters to be used for a query of a model from a specific API. The library will process the experiment file and query models and store results. You are also  able to query _multiple_ models from _different_ APIs in a single experiment file and `prompto` will take care of querying the models _asynchronously_ and in _parallel_.
+With `prompto`, you are able to define your experiments of LLMs in a jsonl or csv file where each line/row contains the prompt and any parameters to be used for a query of a model from a specific API. The library will process the experiment file and query models and store results. You are also  able to query _multiple_ models from _different_ APIs in a single experiment file and `prompto` will take care of querying the models _asynchronously_ and in _parallel_.
 
 The library is designed to be extensible and can be used to query different models.
 
diff --git a/docs/experiment_file.md b/docs/experiment_file.md
index 77b7d364..dce8d144 100644
--- a/docs/experiment_file.md
+++ b/docs/experiment_file.md
@@ -2,6 +2,8 @@
 
 An experiment file is a [JSON Lines (jsonl)](https://jsonlines.org/) file that contains the prompts for the experiments along with any other parameters or metadata that is required for the prompt. Each line in the jsonl file is a valid JSON value which defines a particular input to the LLM which we will obtain a response for. We often refer to a single line in the jsonl file as a "`prompt_dict`" (prompt dictionary).
 
+From `prompto` version 0.2.0 onwards, it's also possible to use `csv` files as input to the pipeline. See the [CSV input section](#csv-input) for more details.
+
 For all models/APIs, we require the following keys in the `prompt_dict`:
 
 * `prompt`: the prompt for the model
@@ -15,6 +17,9 @@ For all models/APIs, we require the following keys in the `prompt_dict`:
 
 In addition, there are other optional keys that can be included in the `prompt_dict`:
 
+* `id`: a unique identifier for the prompt
+    * This is a string that can be used to uniquely identify the prompt. This is useful when you want to track the responses to the prompts and match them back to the original prompts
+    * This is not strictly required, but is often useful to have
 * `parameters`: the parameter settings / generation config for the query (given as a dictionary)
     * This is a dictionary that contains the parameters for the query. The parameters are specific to the model and the API being used. For example, for the Gemini API (`"api": "gemini"`), some parameters to configure are {`temperature`, `max_output_tokens`, `top_p`, `top_k`} etc. which are used to control the generation of the response. For the OpenAI API (`"api": "openai"`), some of these parameters are named differently for instance the maximum output tokens is set using the `max_tokens` parameter and `top_k` is not available to set. For Ollama (`"api": "ollama"`), the parameters are different again, e.g. the maximum number of tokens to predict is set using `num_predict`
     * See the API documentation for the specific API for the list of parameters that can be set and their default values
@@ -23,3 +28,18 @@ In addition, there are other optional keys that can be included in the `prompt_d
     * Note that you can use parallel processing without using the "group" key, but using this key allows you to have full control in order group the prompts in a way that makes sense for your use case. See the [specifying rate limits documentation](rate_limits.md) for more details on parallel processing
 
 Lastly, there are other optional keys that are only available for certain APIs/models. For example, for the Gemini API, you can have a `multimedia` key which is a list of dictionaries defining the multimedia files (e.g. images/videos) to be used in the prompt to a multimodal LLM. For these, see the documentation for the specific API/model for more details.
+
+## CSV input
+
+For using CSV inputs, the `prompt_dict`s are defined as rows in the CSV file. The CSV file should have a header row with the keys corresponding to the keys above with the exception of the `parameters` key. The parameters (the keys in the dictionary) should have their own columns in the CSV file _prepended with a "parameters-" prefix_. For example, if you have a parameter `temperature` in the `parameters` dictionary, you should have a column named `parameters-temperature` in the CSV file. The values for the parameters should be in the corresponding columns.
+
+For example, the two jsonl and csv file inputs are equivalent:
+
+```json
+{"id": "id-0", "prompt": "What is the capital of France?", "api": "openai", "model_name": "gpt-3.5-turbo", "parameters": {"temperature": 0.5, "max_tokens": 100}}
+```
+
+```csv
+id,prompt,api,model_name,parameters-temperature,parameters-max_tokens
+id-0,What is the capital of France?,openai,gpt-3.5-turbo,0.5,100
+```
diff --git a/pyproject.toml b/pyproject.toml
index a652d301..8e380dab 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "prompto"
-version = "0.1.3"
+version = "0.2.0"
 description = "Library for asynchronous querying of LLM API endpoints and logging progress"
 authors = [
     "rchan <rchan@turing.ac.uk>",