From 680becf76a72ba2ee9690be999b631ffcc414761 Mon Sep 17 00:00:00 2001 From: rchan Date: Mon, 23 Sep 2024 17:21:40 +0100 Subject: [PATCH 1/7] allow input csv and output csv in experiment run --- examples/notebooks/running_experiments.ipynb | 14 +- pyproject.toml | 5 +- src/prompto/experiment.py | 133 +++++++++++++++---- src/prompto/experiment_pipeline.py | 4 +- src/prompto/scripts/run_experiment.py | 18 ++- src/prompto/utils.py | 12 +- tests/core/test_experiment.py | 6 +- tests/core/test_experiment_process.py | 20 +-- tests/core/test_experiment_query_model.py | 28 ++-- tests/core/test_experiment_send_requests.py | 16 +-- tests/core/test_utils.py | 14 +- tests/scripts/test_run_experiment.py | 2 +- 12 files changed, 188 insertions(+), 84 deletions(-) diff --git a/examples/notebooks/running_experiments.ipynb b/examples/notebooks/running_experiments.ipynb index 6e6f9846..fac1a827 100644 --- a/examples/notebooks/running_experiments.ipynb +++ b/examples/notebooks/running_experiments.ipynb @@ -347,7 +347,7 @@ "- `creation_time`: the time the experiment file was created\n", "- `log_file`: the path to the log file for the experiment, e.g. `data_folder/output_folder/experiment_name/{creation_time}_experiment_name.log`\n", "- `input_file_path`: the path to the input JSONL file, e.g. `data_folder/input_folder/experiment_name.jsonl`\n", - "- `output_completed_file_path`: the path to the completed output JSONL file, e.g. `data_folder/output_folder/experiment_name/completed-experiment_name.jsonl`\n", + "- `output_completed_jsonl_file_path`: the path to the completed output JSONL file, e.g. `data_folder/output_folder/experiment_name/completed-experiment_name.jsonl`\n", "- `output_input_file_path`: the path to the input output JSONL file, e.g. `data_folder/output_folder/experiment_name/input-experiment_name.jsonl` (this is just for logging to know what the input to the experiment was)\n", "\n", "Essentially, when initialising an `Experiment` object, we construct all the paths that are relevant to that particular experiment such as the log file, the input file path, and the file paths for storing the final output for the experiment. \n", @@ -510,8 +510,8 @@ "experiment.file_name: test.jsonl\n", "experiment.input_file_path: data2/input/test.jsonl\n", "experiment.output_folder: data2/output/test\n", - "experiment.output_input_file_out_path: data2/output/test/25-06-2024-19-14-47-input-test.jsonl\n", - "experiment.output_completed_file_path: data2/output/test/25-06-2024-19-14-47-completed-test.jsonl\n", + "experiment.output_input_jsonl_file_out_path: data2/output/test/25-06-2024-19-14-47-input-test.jsonl\n", + "experiment.output_completed_jsonl_file_path: data2/output/test/25-06-2024-19-14-47-completed-test.jsonl\n", "experiment.log_file: data2/output/test/25-06-2024-19-14-47-log-test.txt\n" ] } @@ -520,8 +520,12 @@ "print(f\"experiment.file_name: {experiment.file_name}\")\n", "print(f\"experiment.input_file_path: {experiment.input_file_path}\")\n", "print(f\"experiment.output_folder: {experiment.output_folder}\")\n", - "print(f\"experiment.output_input_file_out_path: {experiment.output_input_file_out_path}\")\n", - "print(f\"experiment.output_completed_file_path: {experiment.output_completed_file_path}\")\n", + "print(\n", + " f\"experiment.output_input_jsonl_file_out_path: {experiment.output_input_jsonl_file_out_path}\"\n", + ")\n", + "print(\n", + " f\"experiment.output_completed_jsonl_file_path: {experiment.output_completed_jsonl_file_path}\"\n", + ")\n", "print(f\"experiment.log_file: {experiment.log_file}\")" ] }, diff --git a/pyproject.toml b/pyproject.toml index 0a4362c5..84a34151 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ readme = "README.md" python = ">=3.11,<4.0" tqdm = "^4.66.4" python-dotenv = "^1.0.1" +pandas = "^2.2.3" black = { version = "^24.3.0", optional = true } isort = { version = "^5.13.2", optional = true } pre-commit = { version = "^3.7.0", optional = true } @@ -38,8 +39,8 @@ pillow = { version = "^10.4.0", optional = true } ollama = { version = "^0.3.1", optional = true } huggingface-hub = { version = "^0.23.4", optional = true } quart = { version = "^0.19.6", optional = true } -transformers = { version = "^4.41.2", optional = true } -torch = { version = "^2.3.1", optional = true } +transformers = { version = "^4.44.2", optional = true } +torch = { version = "^2.4.1", optional = true } accelerate = { version = "^0.31.0", optional = true } aiohttp = { version = "^3.9.5", optional = true } anthropic = { version = "^0.31.1", optional = true } diff --git a/src/prompto/experiment.py b/src/prompto/experiment.py index 1b91d146..6811b35f 100644 --- a/src/prompto/experiment.py +++ b/src/prompto/experiment.py @@ -5,6 +5,7 @@ import time from datetime import datetime +import pandas as pd from tqdm import tqdm from tqdm.asyncio import tqdm_asyncio @@ -32,7 +33,7 @@ class Experiment: Parameters ---------- file_name : str - The name of the jsonl experiment file + The name of the jsonl or csv experiment file settings : Settings Settings for the pipeline which includes the data folder locations, the maximum number of queries to send per minute, the maximum number @@ -44,8 +45,8 @@ def __init__( file_name: str, settings: Settings, ): - if not file_name.endswith(".jsonl"): - raise ValueError("Experiment file must be a jsonl file") + if not file_name.endswith(".jsonl") and not file_name.endswith(".csv"): + raise ValueError("Experiment file must be a jsonl or csv file") self.file_name: str = file_name # obtain experiment name from file name @@ -70,15 +71,7 @@ def __init__( ) # read in the experiment data - with open(self.input_file_path, "r") as f: - self._experiment_prompts: list[dict] = [ - dict(json.loads(line)) for line in f - ] - # sort the prompts by model_name key for the ollama api - # (for avoiding constantly switching and loading models between prompts) - self._experiment_prompts = sort_prompts_by_model_for_api( - self._experiment_prompts, api="ollama" - ) + self._experiment_prompts = self._read_input_file() # set the number of queries self.number_queries: int = len(self._experiment_prompts) @@ -95,13 +88,14 @@ def __init__( self.log_file: str = os.path.join( self.output_folder, f"{self.start_time}-log-{self.experiment_name}.txt" ) - # file path of the completed experiment file in the output experiment folder - self.output_completed_file_path: str = os.path.join( - self.output_folder, f"{self.start_time}-completed-" + self.file_name + # file path of the completed experiment jsonl file in the output experiment folder + self.output_completed_jsonl_file_path: str = os.path.join( + self.output_folder, + f"{self.start_time}-completed-{self.experiment_name}.jsonl", ) - # file path of the input file in the output experiment folder (for logging purposes) - self.output_input_file_out_path: str = os.path.join( - self.output_folder, f"{self.start_time}-input-" + self.file_name + # file path of the input jsonl file in the output experiment folder (for logging purposes) + self.output_input_jsonl_file_out_path: str = os.path.join( + self.output_folder, f"{self.start_time}-input-{self.experiment_name}.jsonl" ) # grouped experiment prompts by @@ -111,9 +105,31 @@ def __init__( # initialise the completed responses self.completed_responses: list[dict] = [] + # initialise the completed response data frame + self._completed_responses_dataframe: pd.DataFrame | None = None + def __str__(self) -> str: return self.file_name + def _read_input_file(self) -> list[dict]: + with open(self.input_file_path, "r") as f: + if self.input_file_path.endswith(".jsonl"): + experiment_prompts: list[dict] = [dict(json.loads(line)) for line in f] + elif self.input_file_path.endswith(".csv"): + experiment_prompts: list[dict] = pd.read_csv(f).to_dict( + orient="records" + ) + else: + raise ValueError("Experiment file must be a jsonl or csv file") + + # sort the prompts by model_name key for the ollama api + # (for avoiding constantly switching and loading models between prompts) + experiment_prompts = sort_prompts_by_model_for_api( + experiment_prompts, api="ollama" + ) + + return experiment_prompts + @property def experiment_prompts(self) -> list[dict]: return self._experiment_prompts @@ -122,6 +138,19 @@ def experiment_prompts(self) -> list[dict]: def experiment_prompts(self, value: list[dict]) -> None: raise AttributeError("Cannot set the experiment_prompts attribute") + @property + def completed_responses_dataframe(self) -> pd.DataFrame: + if self._completed_responses_dataframe is None: + self._completed_responses_dataframe = ( + self._obtain_completed_responses_dataframe() + ) + + return self._completed_responses_dataframe + + @completed_responses_dataframe.setter + def completed_responses_dataframe(self, value: pd.DataFrame) -> None: + raise AttributeError("Cannot set the completed_responses_dataframe attribute") + @property def grouped_experiment_prompts(self) -> dict[str, list[dict]]: # if settings.parallel is False, then we won't utilise the grouping @@ -298,14 +327,41 @@ async def process( # create the output folder for the experiment create_folder(self.output_folder) - # move the experiment file to the output folder + # if the experiment file is csv file, we create a jsonl file which will get moved + if self.input_file_path.endswith(".csv"): + # move the input experiment csv file to the output folder + output_input_csv_file_out_path = ( + self.output_input_jsonl_file_out_path.replace(".jsonl", ".csv") + ) + logging.info( + f"Moving {self.input_file_path} to {self.output_folder} as " + f"{output_input_csv_file_out_path}..." + ) + move_file( + source=self.input_file_path, + destination=output_input_csv_file_out_path, + ) + + # create an input experiment jsonl file for the experiment + logging.info( + f"Converting {self.input_file_path} to jsonl file for processing..." + ) + input_file_path_as_jsonl = self.input_file_path.replace(".csv", ".jsonl") + with open(input_file_path_as_jsonl, "w") as f: + for prompt_dict in self.experiment_prompts: + json.dump(prompt_dict, f) + f.write("\n") + else: + input_file_path_as_jsonl = self.input_file_path + + # move the input experiment jsonl file to the output folder logging.info( - f"Moving {self.input_file_path} to {self.output_folder} as " - f"{self.output_input_file_out_path}..." + f"Moving {input_file_path_as_jsonl} to {self.output_folder} as " + f"{self.output_input_jsonl_file_out_path}..." ) move_file( - source=self.input_file_path, - destination=self.output_input_file_out_path, + source=input_file_path_as_jsonl, + destination=self.output_input_jsonl_file_out_path, ) # run the experiment asynchronously @@ -347,7 +403,7 @@ async def process( avg_query_processing_time = processing_time / self.number_queries # read the output file - with open(self.output_completed_file_path, "r") as f: + with open(self.output_completed_jsonl_file_path, "r") as f: self.completed_responses: list[dict] = [ dict(json.loads(line)) for line in f ] @@ -633,7 +689,7 @@ async def query_model_and_record_response( # record the response in a jsonl file asynchronously using FILE_WRITE_LOCK async with FILE_WRITE_LOCK: - with open(self.output_completed_file_path, "a") as f: + with open(self.output_completed_jsonl_file_path, "a") as f: json.dump(completed_prompt_dict, f) f.write("\n") @@ -720,3 +776,30 @@ async def evaluate_responses( prompt_dict = func(prompt_dict) return prompt_dict + + def _obtain_completed_responses_dataframe(self) -> pd.DataFrame: + if self.completed_responses == []: + raise ValueError( + "No completed responses to convert to a DataFrame " + "(completed_responses attribute is empty). " + "Run the process method to obtain the completed responses" + ) + + return pd.DataFrame.from_records(self.completed_responses) + + def save_completed_responses_to_csv(self, filename: str = None) -> None: + """ + Save the completed responses to a csv file. + + Parameters + ---------- + filename : str | None + The name of the csv file to save the completed responses to. + If None, the filename will be the experiment name with the + timestamp of when the experiment started to run, by default None + """ + if filename is None: + filename = self.output_completed_jsonl_file_path.replace(".jsonl", ".csv") + + logging.info(f"Saving completed responses (as csv) to {filename}...") + self.completed_responses_dataframe.to_csv(filename, index=False) diff --git a/src/prompto/experiment_pipeline.py b/src/prompto/experiment_pipeline.py index 0b09a534..1d079181 100644 --- a/src/prompto/experiment_pipeline.py +++ b/src/prompto/experiment_pipeline.py @@ -6,7 +6,7 @@ from prompto.settings import Settings from prompto.utils import ( create_folder, - sort_jsonl_files_by_creation_time, + sort_input_files_by_creation_time, write_log_message, ) @@ -77,7 +77,7 @@ def update_experiment_files(self) -> None: Function to update the list of experiment files by sorting the files by creation/change time (using `os.path.getctime`). """ - self.experiment_files = sort_jsonl_files_by_creation_time( + self.experiment_files = sort_input_files_by_creation_time( input_folder=self.settings.input_folder ) diff --git a/src/prompto/scripts/run_experiment.py b/src/prompto/scripts/run_experiment.py index 9d3ec4d0..84c2cb91 100644 --- a/src/prompto/scripts/run_experiment.py +++ b/src/prompto/scripts/run_experiment.py @@ -174,9 +174,9 @@ def parse_file_path_and_check_in_input( if not os.path.exists(file_path): raise FileNotFoundError(f"File {file_path} not found") - # check if file is a jsonl file - if not file_path.endswith(".jsonl"): - raise ValueError("Experiment file must be a jsonl file") + # check if file is a jsonl or csv file + if not file_path.endswith(".jsonl") and not file_path.endswith(".csv"): + raise ValueError("Experiment file must be a jsonl or csv file") # get experiment file name (without the path) experiment_file_name = os.path.basename(file_path) @@ -397,6 +397,12 @@ async def main(): type=str, default=None, ) + parser.add_argument( + "--output-as-csv", + help="Output the results as a csv file", + action="store_true", + default=False, + ) args = parser.parse_args() # initialise logging @@ -450,6 +456,9 @@ async def main(): logging.info(f"Starting processing experiment: {args.file}...") await experiment.process(evaluation_funcs=scoring_functions) + if args.output_as_csv: + experiment.save_completed_responses_to_csv() + # create judge experiment judge_experiment = create_judge_experiment( create_judge_file=create_judge_file, @@ -466,6 +475,9 @@ async def main(): ) await judge_experiment.process() + if args.output_as_csv: + judge_experiment.save_completed_responses_to_csv() + logging.info("Experiment processed successfully!") diff --git a/src/prompto/utils.py b/src/prompto/utils.py index 74553fd9..6b7e50e6 100644 --- a/src/prompto/utils.py +++ b/src/prompto/utils.py @@ -7,9 +7,9 @@ FILE_WRITE_LOCK = asyncio.Lock() -def sort_jsonl_files_by_creation_time(input_folder: str) -> list[str]: +def sort_input_files_by_creation_time(input_folder: str) -> list[str]: """ - Function sorts the jsonl files in the input folder by creation/change + Function sorts the jsonl or csv files in the input folder by creation/change time in a given directory. Parameters @@ -20,7 +20,7 @@ def sort_jsonl_files_by_creation_time(input_folder: str) -> list[str]: Returns ------- list[str] - Ordered list of jsonl filenames in the input folder. + Ordered list of jsonl or csv filenames in the input folder. """ if not os.path.isdir(input_folder): raise ValueError( @@ -28,7 +28,11 @@ def sort_jsonl_files_by_creation_time(input_folder: str) -> list[str]: ) return sorted( - [f for f in os.listdir(input_folder) if f.endswith(".jsonl")], + [ + f + for f in os.listdir(input_folder) + if (f.endswith(".jsonl") or f.endswith(".csv")) + ], key=lambda f: os.path.getctime(os.path.join(input_folder, f)), ) diff --git a/tests/core/test_experiment.py b/tests/core/test_experiment.py index 66ad8e54..4604d3a6 100644 --- a/tests/core/test_experiment.py +++ b/tests/core/test_experiment.py @@ -20,7 +20,7 @@ def test_experiment_init_errors(temporary_data_folders): Experiment(settings=Settings()) # passing in a filename that is not a .jsonl file should raise a ValueError - with pytest.raises(ValueError, match="Experiment file must be a jsonl file"): + with pytest.raises(ValueError, match="Experiment file must be a jsonl or csv file"): Experiment("test.txt", settings=Settings()) # passing in a filename that is not in settings.input_folder should raise a FileNotFoundError @@ -56,11 +56,11 @@ def test_experiment_init(temporary_data_folders): assert isinstance(experiment.creation_time, str) assert isinstance(experiment.start_time, str) assert ( - experiment.output_completed_file_path + experiment.output_completed_jsonl_file_path == f"data/output/test_in_input/{experiment.start_time}-completed-test_in_input.jsonl" ) assert ( - experiment.output_input_file_out_path + experiment.output_input_jsonl_file_out_path == f"data/output/test_in_input/{experiment.start_time}-input-test_in_input.jsonl" ) assert experiment._experiment_prompts == [ diff --git a/tests/core/test_experiment_process.py b/tests/core/test_experiment_process.py index 0641b6de..90708bdb 100644 --- a/tests/core/test_experiment_process.py +++ b/tests/core/test_experiment_process.py @@ -65,8 +65,8 @@ async def test_process( assert experiment.completed_responses == result # check that the response is saved to the output file - assert os.path.exists(experiment.output_completed_file_path) - with open(experiment.output_completed_file_path, "r") as f: + assert os.path.exists(experiment.output_completed_jsonl_file_path) + with open(experiment.output_completed_jsonl_file_path, "r") as f: responses = [dict(json.loads(line)) for line in f] assert responses == result @@ -194,8 +194,8 @@ async def test_process_with_max_queries_dict( assert experiment.completed_responses == result # check that the response is saved to the output file - assert os.path.exists(experiment.output_completed_file_path) - with open(experiment.output_completed_file_path, "r") as f: + assert os.path.exists(experiment.output_completed_jsonl_file_path) + with open(experiment.output_completed_jsonl_file_path, "r") as f: responses = [dict(json.loads(line)) for line in f] assert responses == result @@ -337,8 +337,8 @@ async def test_process_with_groups( assert experiment.completed_responses == result # check that the response is saved to the output file - assert os.path.exists(experiment.output_completed_file_path) - with open(experiment.output_completed_file_path, "r") as f: + assert os.path.exists(experiment.output_completed_jsonl_file_path) + with open(experiment.output_completed_jsonl_file_path, "r") as f: responses = [dict(json.loads(line)) for line in f] assert responses == result @@ -466,8 +466,8 @@ async def test_process_with_max_queries_dict_and_groups( assert experiment.completed_responses == result # check that the response is saved to the output file - assert os.path.exists(experiment.output_completed_file_path) - with open(experiment.output_completed_file_path, "r") as f: + assert os.path.exists(experiment.output_completed_jsonl_file_path) + with open(experiment.output_completed_jsonl_file_path, "r") as f: responses = [dict(json.loads(line)) for line in f] assert responses == result @@ -730,8 +730,8 @@ async def test_process_with_evaluation( assert experiment.completed_responses == result # check that the response is saved to the output file - assert os.path.exists(experiment.output_completed_file_path) - with open(experiment.output_completed_file_path, "r") as f: + assert os.path.exists(experiment.output_completed_jsonl_file_path) + with open(experiment.output_completed_jsonl_file_path, "r") as f: responses = [dict(json.loads(line)) for line in f] assert responses == result diff --git a/tests/core/test_experiment_query_model.py b/tests/core/test_experiment_query_model.py index 328deab9..f39c2f68 100644 --- a/tests/core/test_experiment_query_model.py +++ b/tests/core/test_experiment_query_model.py @@ -53,8 +53,8 @@ async def test_query_model_and_record_response( assert log_msg in caplog.text # check that the response is saved to the output file - assert os.path.exists(experiment.output_completed_file_path) - with open(experiment.output_completed_file_path, "r") as f: + assert os.path.exists(experiment.output_completed_jsonl_file_path) + with open(experiment.output_completed_jsonl_file_path, "r") as f: responses = [dict(json.loads(line)) for line in f] assert len(responses) == 1 @@ -158,8 +158,8 @@ async def test_query_model_and_record_response_not_implemented_error( ) # check that the response is saved to the output file - assert os.path.exists(experiment.output_completed_file_path) - with open(experiment.output_completed_file_path, "r") as f: + assert os.path.exists(experiment.output_completed_jsonl_file_path) + with open(experiment.output_completed_jsonl_file_path, "r") as f: responses = [dict(json.loads(line)) for line in f] assert len(responses) == 1 @@ -213,8 +213,8 @@ async def test_query_model_and_record_response_key_error( assert result["response"] == "KeyError - 'some key error'" # check that the response is saved to the output file - assert os.path.exists(experiment.output_completed_file_path) - with open(experiment.output_completed_file_path, "r") as f: + assert os.path.exists(experiment.output_completed_jsonl_file_path) + with open(experiment.output_completed_jsonl_file_path, "r") as f: responses = [dict(json.loads(line)) for line in f] assert len(responses) == 1 @@ -264,8 +264,8 @@ async def test_query_model_and_record_response_value_error( assert result["response"] == "ValueError - some value error" # check that the response is saved to the output file - assert os.path.exists(experiment.output_completed_file_path) - with open(experiment.output_completed_file_path, "r") as f: + assert os.path.exists(experiment.output_completed_jsonl_file_path) + with open(experiment.output_completed_jsonl_file_path, "r") as f: responses = [dict(json.loads(line)) for line in f] assert len(responses) == 1 @@ -315,8 +315,8 @@ async def test_query_model_and_record_response_type_error( assert result["response"] == "TypeError - some type error" # check that the response is saved to the output file - assert os.path.exists(experiment.output_completed_file_path) - with open(experiment.output_completed_file_path, "r") as f: + assert os.path.exists(experiment.output_completed_jsonl_file_path) + with open(experiment.output_completed_jsonl_file_path, "r") as f: responses = [dict(json.loads(line)) for line in f] assert len(responses) == 1 @@ -366,8 +366,8 @@ async def test_query_model_and_record_response_file_not_found_error( assert result["response"] == "FileNotFoundError - some type error" # check that the response is saved to the output file - assert os.path.exists(experiment.output_completed_file_path) - with open(experiment.output_completed_file_path, "r") as f: + assert os.path.exists(experiment.output_completed_jsonl_file_path) + with open(experiment.output_completed_jsonl_file_path, "r") as f: responses = [dict(json.loads(line)) for line in f] assert len(responses) == 1 @@ -460,8 +460,8 @@ async def test_query_model_and_record_response_exception_error_max( ) # check that the response is saved to the output file - assert os.path.exists(experiment.output_completed_file_path) - with open(experiment.output_completed_file_path, "r") as f: + assert os.path.exists(experiment.output_completed_jsonl_file_path) + with open(experiment.output_completed_jsonl_file_path, "r") as f: responses = [dict(json.loads(line)) for line in f] assert len(responses) == 1 diff --git a/tests/core/test_experiment_send_requests.py b/tests/core/test_experiment_send_requests.py index 0693b0f5..6bd70df5 100644 --- a/tests/core/test_experiment_send_requests.py +++ b/tests/core/test_experiment_send_requests.py @@ -345,8 +345,8 @@ async def test_send_requests_retry( ) # check that the response is saved to the output file - assert os.path.exists(experiment.output_completed_file_path) - with open(experiment.output_completed_file_path, "r") as f: + assert os.path.exists(experiment.output_completed_jsonl_file_path) + with open(experiment.output_completed_jsonl_file_path, "r") as f: responses = [dict(json.loads(line)) for line in f] assert len(responses) == len(PROMPT_DICTS_TO_TEST) @@ -414,8 +414,8 @@ async def test_send_requests_retry_no_retries( ) # check that the response is saved to the output file - assert os.path.exists(experiment.output_completed_file_path) - with open(experiment.output_completed_file_path, "r") as f: + assert os.path.exists(experiment.output_completed_jsonl_file_path) + with open(experiment.output_completed_jsonl_file_path, "r") as f: responses = [dict(json.loads(line)) for line in f] assert len(responses) == len(PROMPT_DICTS_TO_TEST) - 1 @@ -466,8 +466,8 @@ async def test_send_requests_retry_with_group( ) # check that the response is saved to the output file - assert os.path.exists(experiment.output_completed_file_path) - with open(experiment.output_completed_file_path, "r") as f: + assert os.path.exists(experiment.output_completed_jsonl_file_path) + with open(experiment.output_completed_jsonl_file_path, "r") as f: responses = [dict(json.loads(line)) for line in f] assert len(responses) == len(PROMPT_DICTS_TO_TEST) @@ -547,8 +547,8 @@ async def test_send_requests_retry_no_retries_group( ) # check that the response is saved to the output file - assert os.path.exists(experiment.output_completed_file_path) - with open(experiment.output_completed_file_path, "r") as f: + assert os.path.exists(experiment.output_completed_jsonl_file_path) + with open(experiment.output_completed_jsonl_file_path, "r") as f: responses = [dict(json.loads(line)) for line in f] assert len(responses) == len(PROMPT_DICTS_TO_TEST) - 1 diff --git a/tests/core/test_utils.py b/tests/core/test_utils.py index 8f9f4ec8..7c554c52 100644 --- a/tests/core/test_utils.py +++ b/tests/core/test_utils.py @@ -19,29 +19,29 @@ log_success_response_query, move_file, parse_list_arg, - sort_jsonl_files_by_creation_time, + sort_input_files_by_creation_time, sort_prompts_by_model_for_api, write_log_message, ) -def test_sort_jsonl_files_by_creation_time(temporary_data_folders, caplog): +def test_sort_input_files_by_creation_time(temporary_data_folders, caplog): caplog.set_level(logging.INFO) # raise error if no input folder is passed with pytest.raises(TypeError, match="missing 1 required positional argument"): - sort_jsonl_files_by_creation_time() + sort_input_files_by_creation_time() # raise error if not a path with pytest.raises( ValueError, match="Input folder 'not_a_folder' must be a valid path to a folder" ): - sort_jsonl_files_by_creation_time(input_folder="not_a_folder") + sort_input_files_by_creation_time(input_folder="not_a_folder") # raise error if not a folder with pytest.raises( ValueError, match="Input folder 'test.txt' must be a valid path to a folder" ): - sort_jsonl_files_by_creation_time(input_folder="test.txt") + sort_input_files_by_creation_time(input_folder="test.txt") # sort the jsonl files in the utils folder by creation time logging.info( @@ -54,11 +54,11 @@ def test_sort_jsonl_files_by_creation_time(temporary_data_folders, caplog): if f.endswith(".jsonl") } ) - sorted_files = sort_jsonl_files_by_creation_time(input_folder="utils") + sorted_files = sort_input_files_by_creation_time(input_folder="utils") assert sorted_files == ["first.jsonl", "second.jsonl", "third.jsonl"] # sort empty folder should return empty list - empty_folder = sort_jsonl_files_by_creation_time(input_folder="data") + empty_folder = sort_input_files_by_creation_time(input_folder="data") assert empty_folder == [] diff --git a/tests/scripts/test_run_experiment.py b/tests/scripts/test_run_experiment.py index a93ff213..267348a5 100644 --- a/tests/scripts/test_run_experiment.py +++ b/tests/scripts/test_run_experiment.py @@ -151,7 +151,7 @@ def test_parse_file_path_and_check_in_input_error(temporary_data_folder_judge): parse_file_path_and_check_in_input("unknown.json", "test") # raise error if file is not jsonl file path - with pytest.raises(ValueError, match="Experiment file must be a jsonl file"): + with pytest.raises(ValueError, match="Experiment file must be a jsonl or csv file"): parse_file_path_and_check_in_input("max_queries_dict.json", "test") From 35019cd8b7210a8273044d164744c169a18190ce Mon Sep 17 00:00:00 2001 From: rchan Date: Tue, 24 Sep 2024 09:15:48 +0100 Subject: [PATCH 2/7] update running experiments notebook --- ... 24-09-2024-09-13-56-completed-test.jsonl} | 0 .../24-09-2024-09-13-56-input-test.jsonl} | 0 .../test/24-09-2024-09-13-56-log-test.txt | 4 + .../test/25-06-2024-19-14-47-input-test.jsonl | 3 - .../test/25-06-2024-19-14-47-log-test.txt | 4 - ...24-09-2024-09-14-36-completed-test2.jsonl} | 6 +- .../24-09-2024-09-14-36-input-test2.jsonl} | 0 .../test2/24-09-2024-09-14-36-log-test2.txt | 2 + .../25-06-2024-19-15-29-input-test2.jsonl | 3 - .../test2/25-06-2024-19-15-29-log-test2.txt | 1 - examples/notebooks/running_experiments.ipynb | 155 +++++++++++++++--- 11 files changed, 138 insertions(+), 40 deletions(-) rename examples/notebooks/data2/output/test/{25-06-2024-19-14-47-completed-test.jsonl => 24-09-2024-09-13-56-completed-test.jsonl} (100%) mode change 100644 => 100755 rename examples/notebooks/data2/{input/test.jsonl => output/test/24-09-2024-09-13-56-input-test.jsonl} (100%) create mode 100755 examples/notebooks/data2/output/test/24-09-2024-09-13-56-log-test.txt delete mode 100644 examples/notebooks/data2/output/test/25-06-2024-19-14-47-input-test.jsonl delete mode 100644 examples/notebooks/data2/output/test/25-06-2024-19-14-47-log-test.txt rename examples/notebooks/data2/output/test2/{25-06-2024-19-15-29-completed-test2.jsonl => 24-09-2024-09-14-36-completed-test2.jsonl} (57%) rename examples/notebooks/data2/{input/test2.jsonl => output/test2/24-09-2024-09-14-36-input-test2.jsonl} (100%) create mode 100644 examples/notebooks/data2/output/test2/24-09-2024-09-14-36-log-test2.txt delete mode 100644 examples/notebooks/data2/output/test2/25-06-2024-19-15-29-input-test2.jsonl delete mode 100644 examples/notebooks/data2/output/test2/25-06-2024-19-15-29-log-test2.txt diff --git a/examples/notebooks/data2/output/test/25-06-2024-19-14-47-completed-test.jsonl b/examples/notebooks/data2/output/test/24-09-2024-09-13-56-completed-test.jsonl old mode 100644 new mode 100755 similarity index 100% rename from examples/notebooks/data2/output/test/25-06-2024-19-14-47-completed-test.jsonl rename to examples/notebooks/data2/output/test/24-09-2024-09-13-56-completed-test.jsonl diff --git a/examples/notebooks/data2/input/test.jsonl b/examples/notebooks/data2/output/test/24-09-2024-09-13-56-input-test.jsonl similarity index 100% rename from examples/notebooks/data2/input/test.jsonl rename to examples/notebooks/data2/output/test/24-09-2024-09-13-56-input-test.jsonl diff --git a/examples/notebooks/data2/output/test/24-09-2024-09-13-56-log-test.txt b/examples/notebooks/data2/output/test/24-09-2024-09-13-56-log-test.txt new file mode 100755 index 00000000..48657632 --- /dev/null +++ b/examples/notebooks/data2/output/test/24-09-2024-09-13-56-log-test.txt @@ -0,0 +1,4 @@ +24-09-2024, 09:14: Error (i=1, id=9): NotImplementedError - API unknown-api not recognised or implemented +24-09-2024, 09:14: Error (i=2, id=10): NotImplementedError - API unknown-api not recognised or implemented +24-09-2024, 09:14: Error (i=3, id=11): NotImplementedError - API unknown-api not recognised or implemented +24-09-2024, 09:14: Completed experiment: test.jsonl! Experiment processing time: 3.703 seconds, Average time per query: 1.234 seconds diff --git a/examples/notebooks/data2/output/test/25-06-2024-19-14-47-input-test.jsonl b/examples/notebooks/data2/output/test/25-06-2024-19-14-47-input-test.jsonl deleted file mode 100644 index 6d212df6..00000000 --- a/examples/notebooks/data2/output/test/25-06-2024-19-14-47-input-test.jsonl +++ /dev/null @@ -1,3 +0,0 @@ -{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "api": "unknown-api", "model_name": "unknown-model-name", "parameters": {"candidate_count": 1, "max_output_tokens": 64, "temperature": 1, "top_k": 40}} -{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "api": "unknown-api", "model_name": "unknown-model-name", "parameters": {"candidate_count": 1, "max_output_tokens": 128, "temperature": 0.5, "top_k": 40}} -{"id": 11, "prompt": "How many theaters are there in London's South End?", "api": "unknown-api", "model_name": "unknown-model-name"} diff --git a/examples/notebooks/data2/output/test/25-06-2024-19-14-47-log-test.txt b/examples/notebooks/data2/output/test/25-06-2024-19-14-47-log-test.txt deleted file mode 100644 index 800e796f..00000000 --- a/examples/notebooks/data2/output/test/25-06-2024-19-14-47-log-test.txt +++ /dev/null @@ -1,4 +0,0 @@ -25-06-2024, 19:18: Error (i=1) [id=9]. NotImplementedError - API unknown-api not recognised or implemented -25-06-2024, 19:18: Error (i=2) [id=10]. NotImplementedError - API unknown-api not recognised or implemented -25-06-2024, 19:18: Error (i=3) [id=11]. NotImplementedError - API unknown-api not recognised or implemented -25-06-2024, 19:18: Completed experiment test.jsonl! Experiment processing time: 3.713 seconds, Average time per query: 1.238 seconds diff --git a/examples/notebooks/data2/output/test2/25-06-2024-19-15-29-completed-test2.jsonl b/examples/notebooks/data2/output/test2/24-09-2024-09-14-36-completed-test2.jsonl similarity index 57% rename from examples/notebooks/data2/output/test2/25-06-2024-19-15-29-completed-test2.jsonl rename to examples/notebooks/data2/output/test2/24-09-2024-09-14-36-completed-test2.jsonl index 780c6060..584035ef 100644 --- a/examples/notebooks/data2/output/test2/25-06-2024-19-15-29-completed-test2.jsonl +++ b/examples/notebooks/data2/output/test2/24-09-2024-09-14-36-completed-test2.jsonl @@ -1,3 +1,3 @@ -{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "api": "test", "model_name": "test", "parameters": {"candidate_count": 1, "max_output_tokens": 64, "temperature": 1, "top_k": 40}, "response": "This is a test response"} -{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "api": "test", "model_name": "test", "parameters": {"candidate_count": 1, "max_output_tokens": 128, "temperature": 0.5, "top_k": 40}, "response": "This is a test response"} -{"id": 11, "prompt": "How many theaters are there in London's South End?", "api": "test", "model_name": "test", "response": "This is a test response"} +{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "api": "test", "model_name": "test", "parameters": {"candidate_count": 1, "max_output_tokens": 64, "temperature": 1, "top_k": 40}, "timestamp_sent": "24-09-2024-09-14-39", "response": "This is a test response"} +{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "api": "test", "model_name": "test", "parameters": {"candidate_count": 1, "max_output_tokens": 128, "temperature": 0.5, "top_k": 40}, "timestamp_sent": "24-09-2024-09-14-40", "response": "This is a test response"} +{"id": 11, "prompt": "How many theaters are there in London's South End?", "api": "test", "model_name": "test", "timestamp_sent": "24-09-2024-09-14-41", "response": "ValueError - This is a test error which we should handle and return"} diff --git a/examples/notebooks/data2/input/test2.jsonl b/examples/notebooks/data2/output/test2/24-09-2024-09-14-36-input-test2.jsonl similarity index 100% rename from examples/notebooks/data2/input/test2.jsonl rename to examples/notebooks/data2/output/test2/24-09-2024-09-14-36-input-test2.jsonl diff --git a/examples/notebooks/data2/output/test2/24-09-2024-09-14-36-log-test2.txt b/examples/notebooks/data2/output/test2/24-09-2024-09-14-36-log-test2.txt new file mode 100644 index 00000000..bb44e5ce --- /dev/null +++ b/examples/notebooks/data2/output/test2/24-09-2024-09-14-36-log-test2.txt @@ -0,0 +1,2 @@ +24-09-2024, 09:14: Error (i=3, id=11): ValueError - This is a test error which we should handle and return +24-09-2024, 09:14: Completed experiment: test2.jsonl! Experiment processing time: 3.615 seconds, Average time per query: 1.205 seconds diff --git a/examples/notebooks/data2/output/test2/25-06-2024-19-15-29-input-test2.jsonl b/examples/notebooks/data2/output/test2/25-06-2024-19-15-29-input-test2.jsonl deleted file mode 100644 index 1233f43d..00000000 --- a/examples/notebooks/data2/output/test2/25-06-2024-19-15-29-input-test2.jsonl +++ /dev/null @@ -1,3 +0,0 @@ -{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "api": "test", "model_name": "test", "parameters": {"candidate_count": 1, "max_output_tokens": 64, "temperature": 1, "top_k": 40}} -{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "api": "test", "model_name": "test", "parameters": {"candidate_count": 1, "max_output_tokens": 128, "temperature": 0.5, "top_k": 40}} -{"id": 11, "prompt": "How many theaters are there in London's South End?", "api": "test", "model_name": "test"} diff --git a/examples/notebooks/data2/output/test2/25-06-2024-19-15-29-log-test2.txt b/examples/notebooks/data2/output/test2/25-06-2024-19-15-29-log-test2.txt deleted file mode 100644 index 8ed63d80..00000000 --- a/examples/notebooks/data2/output/test2/25-06-2024-19-15-29-log-test2.txt +++ /dev/null @@ -1 +0,0 @@ -25-06-2024, 19:19: Completed experiment test2.jsonl! Experiment processing time: 4.613 seconds, Average time per query: 1.538 seconds diff --git a/examples/notebooks/running_experiments.ipynb b/examples/notebooks/running_experiments.ipynb index fac1a827..aa1715fd 100644 --- a/examples/notebooks/running_experiments.ipynb +++ b/examples/notebooks/running_experiments.ipynb @@ -142,7 +142,7 @@ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mWriteFolderError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[6], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43msettings\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minput_folder\u001b[49m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124munknown_folder/input\u001b[39m\u001b[38;5;124m\"\u001b[39m\n", - "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/prompto/src/prompto/settings.py:216\u001b[0m, in \u001b[0;36mSettings.input_folder\u001b[0;34m(self, value)\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[38;5;129m@input_folder\u001b[39m\u001b[38;5;241m.\u001b[39msetter\n\u001b[1;32m 215\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minput_folder\u001b[39m(\u001b[38;5;28mself\u001b[39m, value: \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m--> 216\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m WriteFolderError(\n\u001b[1;32m 217\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot set input folder on it\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms own. Set the \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata_folder\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m instead\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 218\u001b[0m )\n", + "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/prompto/src/prompto/settings.py:176\u001b[0m, in \u001b[0;36mSettings.input_folder\u001b[0;34m(self, value)\u001b[0m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;129m@input_folder\u001b[39m\u001b[38;5;241m.\u001b[39msetter\n\u001b[1;32m 175\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minput_folder\u001b[39m(\u001b[38;5;28mself\u001b[39m, value: \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m--> 176\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m WriteFolderError(\n\u001b[1;32m 177\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot set input folder on it\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms own. Set the \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata_folder\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m instead\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 178\u001b[0m )\n", "\u001b[0;31mWriteFolderError\u001b[0m: Cannot set input folder on it's own. Set the 'data_folder' instead" ] } @@ -164,7 +164,7 @@ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mWriteFolderError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[7], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43msettings\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moutput_folder\u001b[49m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124munknown_folder/output\u001b[39m\u001b[38;5;124m\"\u001b[39m\n", - "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/prompto/src/prompto/settings.py:228\u001b[0m, in \u001b[0;36mSettings.output_folder\u001b[0;34m(self, value)\u001b[0m\n\u001b[1;32m 226\u001b[0m \u001b[38;5;129m@output_folder\u001b[39m\u001b[38;5;241m.\u001b[39msetter\n\u001b[1;32m 227\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21moutput_folder\u001b[39m(\u001b[38;5;28mself\u001b[39m, value: \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m--> 228\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m WriteFolderError(\n\u001b[1;32m 229\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot set output folder on it\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms own. Set the \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata_folder\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m instead\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 230\u001b[0m )\n", + "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/prompto/src/prompto/settings.py:188\u001b[0m, in \u001b[0;36mSettings.output_folder\u001b[0;34m(self, value)\u001b[0m\n\u001b[1;32m 186\u001b[0m \u001b[38;5;129m@output_folder\u001b[39m\u001b[38;5;241m.\u001b[39msetter\n\u001b[1;32m 187\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21moutput_folder\u001b[39m(\u001b[38;5;28mself\u001b[39m, value: \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m--> 188\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m WriteFolderError(\n\u001b[1;32m 189\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot set output folder on it\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms own. Set the \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata_folder\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m instead\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 190\u001b[0m )\n", "\u001b[0;31mWriteFolderError\u001b[0m: Cannot set output folder on it's own. Set the 'data_folder' instead" ] } @@ -186,7 +186,7 @@ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mWriteFolderError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[8], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43msettings\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmedia_folder\u001b[49m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124munknown_folder/media\u001b[39m\u001b[38;5;124m\"\u001b[39m\n", - "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/prompto/src/prompto/settings.py:240\u001b[0m, in \u001b[0;36mSettings.media_folder\u001b[0;34m(self, value)\u001b[0m\n\u001b[1;32m 238\u001b[0m \u001b[38;5;129m@media_folder\u001b[39m\u001b[38;5;241m.\u001b[39msetter\n\u001b[1;32m 239\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmedia_folder\u001b[39m(\u001b[38;5;28mself\u001b[39m, value: \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m--> 240\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m WriteFolderError(\n\u001b[1;32m 241\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot set media folder on it\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms own. Set the \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata_folder\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m instead\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 242\u001b[0m )\n", + "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/prompto/src/prompto/settings.py:200\u001b[0m, in \u001b[0;36mSettings.media_folder\u001b[0;34m(self, value)\u001b[0m\n\u001b[1;32m 198\u001b[0m \u001b[38;5;129m@media_folder\u001b[39m\u001b[38;5;241m.\u001b[39msetter\n\u001b[1;32m 199\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmedia_folder\u001b[39m(\u001b[38;5;28mself\u001b[39m, value: \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m--> 200\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m WriteFolderError(\n\u001b[1;32m 201\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot set media folder on it\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms own. Set the \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata_folder\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m instead\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 202\u001b[0m )\n", "\u001b[0;31mWriteFolderError\u001b[0m: Cannot set media folder on it's own. Set the 'data_folder' instead" ] } @@ -251,8 +251,8 @@ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[11], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43msettings\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata_folder\u001b[49m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124munknown_folder\u001b[39m\u001b[38;5;124m\"\u001b[39m\n", - "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/prompto/src/prompto/settings.py:202\u001b[0m, in \u001b[0;36mSettings.data_folder\u001b[0;34m(self, value)\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[38;5;129m@data_folder\u001b[39m\u001b[38;5;241m.\u001b[39msetter\n\u001b[1;32m 200\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdata_folder\u001b[39m(\u001b[38;5;28mself\u001b[39m, value: \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 201\u001b[0m \u001b[38;5;66;03m# check the data folder exists\u001b[39;00m\n\u001b[0;32m--> 202\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcheck_folder_exists\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 203\u001b[0m \u001b[38;5;66;03m# set the data folder\u001b[39;00m\n\u001b[1;32m 204\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_data_folder \u001b[38;5;241m=\u001b[39m value\n", - "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/prompto/src/prompto/settings.py:154\u001b[0m, in \u001b[0;36mSettings.check_folder_exists\u001b[0;34m(data_folder)\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[38;5;66;03m# check if data folder exists\u001b[39;00m\n\u001b[1;32m 153\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39misdir(data_folder):\n\u001b[0;32m--> 154\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 155\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mData folder \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdata_folder\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m must be a valid path to a folder\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 156\u001b[0m )\n\u001b[1;32m 158\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m\n", + "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/prompto/src/prompto/settings.py:162\u001b[0m, in \u001b[0;36mSettings.data_folder\u001b[0;34m(self, value)\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[38;5;129m@data_folder\u001b[39m\u001b[38;5;241m.\u001b[39msetter\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdata_folder\u001b[39m(\u001b[38;5;28mself\u001b[39m, value: \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 161\u001b[0m \u001b[38;5;66;03m# check the data folder exists\u001b[39;00m\n\u001b[0;32m--> 162\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcheck_folder_exists\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[38;5;66;03m# set the data folder\u001b[39;00m\n\u001b[1;32m 164\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_data_folder \u001b[38;5;241m=\u001b[39m value\n", + "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/prompto/src/prompto/settings.py:114\u001b[0m, in \u001b[0;36mSettings.check_folder_exists\u001b[0;34m(data_folder)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;66;03m# check if data folder exists\u001b[39;00m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39misdir(data_folder):\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 115\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mData folder \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdata_folder\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m must be a valid path to a folder\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 116\u001b[0m )\n\u001b[1;32m 118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m\n", "\u001b[0;31mValueError\u001b[0m: Data folder 'unknown_folder' must be a valid path to a folder" ] } @@ -348,7 +348,7 @@ "- `log_file`: the path to the log file for the experiment, e.g. `data_folder/output_folder/experiment_name/{creation_time}_experiment_name.log`\n", "- `input_file_path`: the path to the input JSONL file, e.g. `data_folder/input_folder/experiment_name.jsonl`\n", "- `output_completed_jsonl_file_path`: the path to the completed output JSONL file, e.g. `data_folder/output_folder/experiment_name/completed-experiment_name.jsonl`\n", - "- `output_input_file_path`: the path to the input output JSONL file, e.g. `data_folder/output_folder/experiment_name/input-experiment_name.jsonl` (this is just for logging to know what the input to the experiment was)\n", + "- `output_input_jsonl_file_out_path`: the path to the input output JSONL file, e.g. `data_folder/output_folder/experiment_name/input-experiment_name.jsonl` (this is just for logging to know what the input to the experiment was)\n", "\n", "Essentially, when initialising an `Experiment` object, we construct all the paths that are relevant to that particular experiment such as the log file, the input file path, and the file paths for storing the final output for the experiment. \n", "\n", @@ -396,7 +396,7 @@ { "data": { "text/plain": [ - "'25-06-2024-19-14-47'" + "'09-07-2024-11-59-54'" ] }, "execution_count": 17, @@ -510,9 +510,9 @@ "experiment.file_name: test.jsonl\n", "experiment.input_file_path: data2/input/test.jsonl\n", "experiment.output_folder: data2/output/test\n", - "experiment.output_input_jsonl_file_out_path: data2/output/test/25-06-2024-19-14-47-input-test.jsonl\n", - "experiment.output_completed_jsonl_file_path: data2/output/test/25-06-2024-19-14-47-completed-test.jsonl\n", - "experiment.log_file: data2/output/test/25-06-2024-19-14-47-log-test.txt\n" + "experiment.output_input_jsonl_file_out_path: data2/output/test/24-09-2024-09-13-56-input-test.jsonl\n", + "experiment.output_completed_jsonl_file_path: data2/output/test/24-09-2024-09-13-56-completed-test.jsonl\n", + "experiment.log_file: data2/output/test/24-09-2024-09-13-56-log-test.txt\n" ] } ], @@ -591,8 +591,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Sending 3 queries (attempt 1/5): 100%|██████████| 3/3 [00:03<00:00, 1.20s/query]\n", - "Waiting for responses (attempt 1/5): 100%|██████████| 3/3 [00:00<00:00, 514.85query/s]\n" + "Sending 3 queries at 50 QPM with RI of 1.2s (attempt 1/5): 100%|██████████| 3/3 [00:03<00:00, 1.20s/query]\n", + "Waiting for responses (attempt 1/5): 100%|██████████| 3/3 [00:00<00:00, 352.44query/s]\n" ] } ], @@ -677,13 +677,112 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "If we look at the output, we can see we got errors that there were `NotImplementedErrors` as the model was not implemented. To see the models implemented, there is a dictionary of models in the `apis` module called `ASYNC_APIS` where the keys are the API names and the values are the corresponding classes." + "After running the experiment, you can also see the output as a dataframe too:" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idpromptapimodel_nameparametersresponse
09[Hello, My name is Bob and I'm 6 years old, Ho...unknown-apiunknown-model-name{'candidate_count': 1, 'max_output_tokens': 64...NotImplementedError - API unknown-api not reco...
110[Can you give me a random number between 1-10?...unknown-apiunknown-model-name{'candidate_count': 1, 'max_output_tokens': 12...NotImplementedError - API unknown-api not reco...
211How many theaters are there in London's South ...unknown-apiunknown-model-nameNaNNotImplementedError - API unknown-api not reco...
\n", + "
" + ], + "text/plain": [ + " id prompt api \\\n", + "0 9 [Hello, My name is Bob and I'm 6 years old, Ho... unknown-api \n", + "1 10 [Can you give me a random number between 1-10?... unknown-api \n", + "2 11 How many theaters are there in London's South ... unknown-api \n", + "\n", + " model_name parameters \\\n", + "0 unknown-model-name {'candidate_count': 1, 'max_output_tokens': 64... \n", + "1 unknown-model-name {'candidate_count': 1, 'max_output_tokens': 12... \n", + "2 unknown-model-name NaN \n", + "\n", + " response \n", + "0 NotImplementedError - API unknown-api not reco... \n", + "1 NotImplementedError - API unknown-api not reco... \n", + "2 NotImplementedError - API unknown-api not reco... " + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "experiment.completed_responses_dataframe" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we look at the output, we can see we got errors that there were `NotImplementedErrors` as the model was not implemented. To see the models implemented, there is a dictionary of models in the `apis` module called `ASYNC_APIS` where the keys are the API names and the values are the corresponding classes." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, "outputs": [ { "data": { @@ -691,6 +790,7 @@ "{'test': prompto.apis.testing.testing_api.TestAPI,\n", " 'azure-openai': prompto.apis.azure_openai.azure_openai.AzureOpenAIAPI,\n", " 'openai': prompto.apis.openai.openai.OpenAIAPI,\n", + " 'anthropic': prompto.apis.anthropic.anthropic.AnthropicAPI,\n", " 'gemini': prompto.apis.gemini.gemini.GeminiAPI,\n", " 'vertexai': prompto.apis.vertexai.vertexai.VertexAIAPI,\n", " 'ollama': prompto.apis.ollama.ollama.OllamaAPI,\n", @@ -698,7 +798,7 @@ " 'quart': prompto.apis.quart.quart.QuartAPI}" ] }, - "execution_count": 27, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -720,7 +820,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -743,7 +843,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -788,7 +888,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -797,7 +897,7 @@ "['test2.jsonl']" ] }, - "execution_count": 30, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -808,7 +908,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -840,7 +940,7 @@ " 'model_name': 'test'}]" ] }, - "execution_count": 31, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -852,15 +952,15 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Sending 3 queries (attempt 1/5): 100%|██████████| 3/3 [00:03<00:00, 1.20s/query]\n", - "Waiting for responses (attempt 1/5): 100%|██████████| 3/3 [00:01<00:00, 2.99query/s]\n" + "Sending 3 queries at 50 QPM with RI of 1.2s (attempt 1/5): 100%|██████████| 3/3 [00:03<00:00, 1.20s/query]\n", + "Waiting for responses (attempt 1/5): 100%|██████████| 3/3 [00:00<00:00, 830.39query/s]\n" ] } ], @@ -870,7 +970,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 34, "metadata": {}, "outputs": [ { @@ -886,6 +986,7 @@ " 'max_output_tokens': 64,\n", " 'temperature': 1,\n", " 'top_k': 40},\n", + " 'timestamp_sent': '24-09-2024-09-14-39',\n", " 'response': 'This is a test response'},\n", " {'id': 10,\n", " 'prompt': ['Can you give me a random number between 1-10?',\n", @@ -897,15 +998,17 @@ " 'max_output_tokens': 128,\n", " 'temperature': 0.5,\n", " 'top_k': 40},\n", + " 'timestamp_sent': '24-09-2024-09-14-40',\n", " 'response': 'This is a test response'},\n", " {'id': 11,\n", " 'prompt': \"How many theaters are there in London's South End?\",\n", " 'api': 'test',\n", " 'model_name': 'test',\n", - " 'response': 'This is a test response'}]" + " 'timestamp_sent': '24-09-2024-09-14-41',\n", + " 'response': 'ValueError - This is a test error which we should handle and return'}]" ] }, - "execution_count": 33, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } From 53869a089e215392f30fadc2c3379dc606495bbf Mon Sep 17 00:00:00 2001 From: rchan Date: Tue, 24 Sep 2024 10:31:17 +0100 Subject: [PATCH 3/7] test experiment methods for csv input and output --- pyproject.toml | 2 +- src/prompto/experiment.py | 13 +- tests/conftest.py | 6 + tests/core/test_experiment.py | 327 +++++++++++++++++++++++++++++++++- 4 files changed, 340 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 84a34151..a652d301 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ huggingface-hub = { version = "^0.23.4", optional = true } quart = { version = "^0.19.6", optional = true } transformers = { version = "^4.44.2", optional = true } torch = { version = "^2.4.1", optional = true } -accelerate = { version = "^0.31.0", optional = true } +accelerate = { version = "^0.34.2", optional = true } aiohttp = { version = "^3.9.5", optional = true } anthropic = { version = "^0.31.1", optional = true } diff --git a/src/prompto/experiment.py b/src/prompto/experiment.py index 6811b35f..4a6f23a7 100644 --- a/src/prompto/experiment.py +++ b/src/prompto/experiment.py @@ -71,7 +71,7 @@ def __init__( ) # read in the experiment data - self._experiment_prompts = self._read_input_file() + self._experiment_prompts = self._read_input_file(self.input_file_path) # set the number of queries self.number_queries: int = len(self._experiment_prompts) @@ -111,11 +111,12 @@ def __init__( def __str__(self) -> str: return self.file_name - def _read_input_file(self) -> list[dict]: - with open(self.input_file_path, "r") as f: - if self.input_file_path.endswith(".jsonl"): + @staticmethod + def _read_input_file(input_file_path) -> list[dict]: + with open(input_file_path, "r") as f: + if input_file_path.endswith(".jsonl"): experiment_prompts: list[dict] = [dict(json.loads(line)) for line in f] - elif self.input_file_path.endswith(".csv"): + elif input_file_path.endswith(".csv"): experiment_prompts: list[dict] = pd.read_csv(f).to_dict( orient="records" ) @@ -801,5 +802,5 @@ def save_completed_responses_to_csv(self, filename: str = None) -> None: if filename is None: filename = self.output_completed_jsonl_file_path.replace(".jsonl", ".csv") - logging.info(f"Saving completed responses (as csv) to {filename}...") + logging.info(f"Saving completed responses as csv to {filename}...") self.completed_responses_dataframe.to_csv(filename, index=False) diff --git a/tests/conftest.py b/tests/conftest.py index f34a519e..0386ac28 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -44,6 +44,7 @@ def temporary_data_folders(tmp_path: Path): ├── media/ ├── .env ├── test.txt + ├── test.csv └── test.jsonl """ # create data folders @@ -58,6 +59,11 @@ def temporary_data_folders(tmp_path: Path): with open(Path(tmp_path / "test.txt"), "w") as f: f.write("test line") + # create a csv file in the folder + with open(Path(tmp_path / "test.csv"), "w") as f: + f.write("prompt,api,model_name\n") + f.write("test prompt,test,test_model\n") + # create a jsonl file in the folder with open(Path(tmp_path / "test.jsonl"), "w") as f: f.write( diff --git a/tests/core/test_experiment.py b/tests/core/test_experiment.py index 4604d3a6..c490d1a3 100644 --- a/tests/core/test_experiment.py +++ b/tests/core/test_experiment.py @@ -1,6 +1,9 @@ import logging +import os +import pandas as pd import pytest +import regex as re from prompto.experiment import Experiment from prompto.settings import Settings @@ -19,7 +22,7 @@ def test_experiment_init_errors(temporary_data_folders): with pytest.raises(TypeError, match="missing 1 required positional argument"): Experiment(settings=Settings()) - # passing in a filename that is not a .jsonl file should raise a ValueError + # passing in a filename that is not a .jsonl or a .csv file should raise a ValueError with pytest.raises(ValueError, match="Experiment file must be a jsonl or csv file"): Experiment("test.txt", settings=Settings()) @@ -31,6 +34,43 @@ def test_experiment_init_errors(temporary_data_folders): Experiment("test.jsonl", settings=Settings()) +def test_experiment_read_input_file_jsonl(temporary_data_folders): + # create a jsonl file + with open("test_in_input.jsonl", "w") as f: + f.write( + '{"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"}\n' + ) + f.write( + '{"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"}\n' + ) + + experiment_prompts = Experiment._read_input_file("test_in_input.jsonl") + assert experiment_prompts == [ + {"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"}, + {"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"}, + ] + + +def test_experiment_read_input_file_csv(temporary_data_folders): + # create a csv file + with open("test_in_input.csv", "w") as f: + f.write("id,prompt,api,model_name\n") + f.write("0,test prompt 0,test,test_model\n") + f.write("1,test prompt 1,test,test_model\n") + + experiment_prompts = Experiment._read_input_file("test_in_input.csv") + assert experiment_prompts == [ + {"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"}, + {"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"}, + ] + + +def test_experiment_read_input_file_error(temporary_data_folders): + # passing in a filename that is not a .jsonl or a .csv file should raise a ValueError + with pytest.raises(ValueError, match="Experiment file must be a jsonl or csv file"): + Experiment._read_input_file("test.txt") + + def test_experiment_init(temporary_data_folders): # create a settings object settings = Settings(data_folder="data", max_queries=50, max_attempts=5) @@ -85,6 +125,161 @@ def test_experiment_init(temporary_data_folders): assert experiment._grouped_experiment_prompts == {} assert experiment.completed_responses == [] + assert experiment._completed_responses_dataframe is None + + +def test_completed_responses_dataframe_getter(temporary_data_folders): + # create a settings object + settings = Settings(data_folder="data", max_queries=50, max_attempts=5) + + # create a jsonl file in the input folder (which is created when initialising Settings object) + with open("data/input/test_in_input.jsonl", "w") as f: + f.write( + '{"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"}\n' + ) + f.write( + '{"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"}\n' + ) + + # create an experiment object + experiment = Experiment("test_in_input.jsonl", settings=settings) + + # if experiment hasn't been ran yet, the dataframe should be None + assert experiment._completed_responses_dataframe is None + + # if trying to obtain it without first running experiment, it should raise an error + with pytest.raises( + ValueError, + match=re.escape( + "No completed responses to convert to a DataFrame " + "(completed_responses attribute is empty). " + "Run the process method to obtain the completed responses" + ), + ): + experiment.completed_responses_dataframe + + # we will set the completed_responses attribute to a list of dictionaries + # and then check that the dataframe is created correctly + experiment.completed_responses = [ + { + "id": 0, + "prompt": "test prompt 0", + "api": "test", + "model_name": "test_model", + "response": "response 0", + }, + { + "id": 1, + "prompt": "test prompt 1", + "api": "test", + "model_name": "test_model", + "response": "response 1", + }, + ] + + # check the dataframe is created correctly when calling the getter + assert isinstance(experiment.completed_responses_dataframe, pd.DataFrame) + assert experiment.completed_responses_dataframe.equals( + experiment._completed_responses_dataframe + ) + assert experiment.completed_responses_dataframe.equals( + pd.DataFrame( + { + "id": [0, 1], + "prompt": ["test prompt 0", "test prompt 1"], + "api": ["test", "test"], + "model_name": ["test_model", "test_model"], + "response": ["response 0", "response 1"], + } + ) + ) + + +def test_completed_responses_dataframe_getter_different_keys(temporary_data_folders): + # create a settings object + settings = Settings(data_folder="data", max_queries=50, max_attempts=5) + + # create a jsonl file in the input folder (which is created when initialising Settings object) + with open("data/input/test_in_input.jsonl", "w") as f: + f.write( + '{"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"}\n' + ) + f.write( + '{"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"}\n' + ) + + # create an experiment object + experiment = Experiment("test_in_input.jsonl", settings=settings) + + # if experiment hasn't been ran yet, the dataframe should be None + assert experiment._completed_responses_dataframe is None + + # if trying to obtain it without first running experiment, it should raise an error + with pytest.raises( + ValueError, + match=re.escape( + "No completed responses to convert to a DataFrame " + "(completed_responses attribute is empty). " + "Run the process method to obtain the completed responses" + ), + ): + experiment.completed_responses_dataframe + + # we will set the completed_responses attribute to a list of dictionaries + # and then check that the dataframe is created correctly + experiment.completed_responses = [ + { + "id": 0, + "prompt": "test prompt 0", + "api": "test", + "model_name": "test_model", + "response": "response 0", + }, + { + "id": 1, + "prompt": "test prompt 1", + "api": "test", + "model_name": "test_model", + "response": "response 1", + "extra_key": "extra_value", + }, + ] + + # check the dataframe is created correctly when calling the getter + assert isinstance(experiment.completed_responses_dataframe, pd.DataFrame) + assert experiment.completed_responses_dataframe.equals( + experiment._completed_responses_dataframe + ) + assert experiment.completed_responses_dataframe.equals( + pd.DataFrame( + { + "id": [0, 1], + "prompt": ["test prompt 0", "test prompt 1"], + "api": ["test", "test"], + "model_name": ["test_model", "test_model"], + "response": ["response 0", "response 1"], + "extra_key": [None, "extra_value"], + } + ) + ) + + +def test_completed_responses_dataframe_setter(temporary_data_folders): + # raise an error if trying to set the completed_responses_dataframe attribute + settings = Settings(data_folder="data", max_queries=50, max_attempts=5) + with open("data/input/test_in_input.jsonl", "w") as f: + f.write( + '{"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"}\n' + ) + f.write( + '{"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"}\n' + ) + + experiment = Experiment("test_in_input.jsonl", settings=settings) + with pytest.raises( + AttributeError, match="Cannot set the completed_responses_dataframe attribute" + ): + experiment.completed_responses_dataframe = pd.DataFrame() def test_experiment_grouped_prompts_simple(temporary_data_folders, caplog): @@ -2740,3 +2935,133 @@ def test_rate_limit_docs_example_6(temporary_rate_limit_doc_examples): "gemini": "4 queries at 5 queries per minute", "openai": "4 queries at 5 queries per minute", } + + +def test_obtain_completed_responses_dataframe(temporary_data_folders): + settings = Settings(data_folder="data", max_queries=50, max_attempts=5) + with open("data/input/test_in_input.jsonl", "w") as f: + f.write( + '{"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"}\n' + ) + f.write( + '{"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"}\n' + ) + + experiment = Experiment("test_in_input.jsonl", settings=settings) + + # if experiment note run, calling this method should raise an error + with pytest.raises( + ValueError, + match=re.escape( + "No completed responses to convert to a DataFrame " + "(completed_responses attribute is empty). " + "Run the process method to obtain the completed responses" + ), + ): + experiment._obtain_completed_responses_dataframe() + + # we will set the completed_responses attribute to a list of dictionaries + # and then check that the dataframe is created correctly + experiment.completed_responses = [ + { + "id": 0, + "prompt": "test prompt 0", + "api": "test", + "model_name": "test_model", + "response": "response 0", + }, + { + "id": 1, + "prompt": "test prompt 1", + "api": "test", + "model_name": "test_model", + "response": "response 1", + }, + ] + + # check the dataframe is created correctly when calling the getter + assert isinstance(experiment._obtain_completed_responses_dataframe(), pd.DataFrame) + assert experiment._obtain_completed_responses_dataframe().equals( + pd.DataFrame( + { + "id": [0, 1], + "prompt": ["test prompt 0", "test prompt 1"], + "api": ["test", "test"], + "model_name": ["test_model", "test_model"], + "response": ["response 0", "response 1"], + } + ) + ) + + +def test_save_completed_responses_to_csv(temporary_data_folders, caplog): + caplog.set_level(logging.INFO) + + settings = Settings(data_folder="data", max_queries=50, max_attempts=5) + with open("data/input/test_in_input.jsonl", "w") as f: + f.write( + '{"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"}\n' + ) + f.write( + '{"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"}\n' + ) + + experiment = Experiment("test_in_input.jsonl", settings=settings) + + # we will set the completed_responses attribute to a list of dictionaries + # and then check that the dataframe is created correctly + experiment.completed_responses = [ + { + "id": 0, + "prompt": "test prompt 0", + "api": "test", + "model_name": "test_model", + "response": "response 0", + }, + { + "id": 1, + "prompt": "test prompt 1", + "api": "test", + "model_name": "test_model", + "response": "response 1", + }, + ] + + # save the completed responses to a csv file + experiment.save_completed_responses_to_csv("test_out.csv") + + # check the csv file is created correctly + assert os.path.exists("test_out.csv") + + # check the csv file content + expected = pd.DataFrame( + { + "id": [0, 1], + "prompt": ["test prompt 0", "test prompt 1"], + "api": ["test", "test"], + "model_name": ["test_model", "test_model"], + "response": ["response 0", "response 1"], + } + ) + loaded_csv = pd.read_csv("test_out.csv") + assert loaded_csv.equals(expected) + + # check logs + assert "Saving completed responses as csv to test_out.csv" in caplog.text + + # save the completed responses to a csv file without specifying the file name + os.makedirs("data/output/test_in_input/", exist_ok=True) + experiment.save_completed_responses_to_csv() + + # check the csv file is created correctly + filename = ( + f"data/output/test_in_input/{experiment.start_time}-completed-test_in_input.csv" + ) + assert os.path.exists(filename) + + # check the csv file content + loaded_csv = pd.read_csv(filename) + assert loaded_csv.equals(expected) + + # check logs + assert f"Saving completed responses as csv to {filename}" in caplog.text From 002aa4f8c76f59ea6e67b450cbc9af37c989d541 Mon Sep 17 00:00:00 2001 From: rchan Date: Tue, 24 Sep 2024 11:51:53 +0100 Subject: [PATCH 4/7] add logic for case where parameters are in csv --- src/prompto/experiment.py | 39 ++++++++- tests/core/test_experiment.py | 146 +++++++++++++++++++++++++++++++++- 2 files changed, 180 insertions(+), 5 deletions(-) diff --git a/src/prompto/experiment.py b/src/prompto/experiment.py index 4a6f23a7..8845cc70 100644 --- a/src/prompto/experiment.py +++ b/src/prompto/experiment.py @@ -115,11 +115,35 @@ def __str__(self) -> str: def _read_input_file(input_file_path) -> list[dict]: with open(input_file_path, "r") as f: if input_file_path.endswith(".jsonl"): + logging.info( + f"Loading experiment prompts from jsonl file {input_file_path}..." + ) experiment_prompts: list[dict] = [dict(json.loads(line)) for line in f] elif input_file_path.endswith(".csv"): - experiment_prompts: list[dict] = pd.read_csv(f).to_dict( - orient="records" + logging.info( + f"Loading experiment prompts from csv file {input_file_path}..." ) + loaded_df = pd.read_csv(f) + parameters_col_names = [ + col for col in loaded_df.columns if "parameters-" in col + ] + if len(parameters_col_names) > 0: + # take the "parameters-" column names and create new column "parameters" + # with the values as a dictionary of the parameters + logging.info(f"Found parameters columns: {parameters_col_names}") + loaded_df["parameters"] = [ + { + parameter.removeprefix("parameters-"): row[parameter] + for parameter in parameters_col_names + if not pd.isna(row[parameter]) + } + for _, row in tqdm( + loaded_df.iterrows(), + desc="Parsing parameters columns for data frame", + unit="row", + ) + ] + experiment_prompts: list[dict] = loaded_df.to_dict(orient="records") else: raise ValueError("Experiment file must be a jsonl or csv file") @@ -803,4 +827,13 @@ def save_completed_responses_to_csv(self, filename: str = None) -> None: filename = self.output_completed_jsonl_file_path.replace(".jsonl", ".csv") logging.info(f"Saving completed responses as csv to {filename}...") - self.completed_responses_dataframe.to_csv(filename, index=False) + if "parameters" in self.completed_responses_dataframe.columns: + # make a copy and convert the parameters column (which should be of dict type) to a json string + completed_responses_dataframe = self.completed_responses_dataframe.copy() + completed_responses_dataframe["parameters"] = completed_responses_dataframe[ + "parameters" + ].apply(json.dumps) + else: + completed_responses_dataframe = self.completed_responses_dataframe + + completed_responses_dataframe.to_csv(filename, index=False) diff --git a/tests/core/test_experiment.py b/tests/core/test_experiment.py index c490d1a3..e89fb448 100644 --- a/tests/core/test_experiment.py +++ b/tests/core/test_experiment.py @@ -34,7 +34,8 @@ def test_experiment_init_errors(temporary_data_folders): Experiment("test.jsonl", settings=Settings()) -def test_experiment_read_input_file_jsonl(temporary_data_folders): +def test_experiment_read_input_file_jsonl(temporary_data_folders, caplog): + caplog.set_level(logging.INFO) # create a jsonl file with open("test_in_input.jsonl", "w") as f: f.write( @@ -49,9 +50,13 @@ def test_experiment_read_input_file_jsonl(temporary_data_folders): {"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"}, {"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"}, ] + assert ( + "Loading experiment prompts from jsonl file test_in_input.jsonl" in caplog.text + ) -def test_experiment_read_input_file_csv(temporary_data_folders): +def test_experiment_read_input_file_csv(temporary_data_folders, caplog): + caplog.set_level(logging.INFO) # create a csv file with open("test_in_input.csv", "w") as f: f.write("id,prompt,api,model_name\n") @@ -63,6 +68,62 @@ def test_experiment_read_input_file_csv(temporary_data_folders): {"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"}, {"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"}, ] + assert "Loading experiment prompts from csv file test_in_input.csv" in caplog.text + + +def test_experiment_read_input_file_csv_with_parameters(temporary_data_folders, caplog): + caplog.set_level(logging.INFO) + # create a csv file + with open("test_in_input.csv", "w") as f: + f.write( + "id,prompt,api,model_name,parameters-temperature,parameters-max-output-tokens\n" + ) + f.write("0,test prompt 0,test,test_model,0.9,100\n") + f.write("1,test prompt 1,test,test_model,None,100\n") + f.write("2,test prompt 2,test,test_model,,100\n") + + experiment_prompts = Experiment._read_input_file("test_in_input.csv") + + # a hack to compare the dictionaries without worrying about the NaN values + # NaNs should occur in experiment_prompts[1]["parameters-temperature"] and experiment_prompts[2]["parameters-temperature"] + assert pd.isna(experiment_prompts[1]["parameters-temperature"]) + assert pd.isna(experiment_prompts[2]["parameters-temperature"]) + # remove them for now + experiment_prompts[1].pop("parameters-temperature") + experiment_prompts[2].pop("parameters-temperature") + + assert experiment_prompts == [ + { + "id": 0, + "prompt": "test prompt 0", + "api": "test", + "model_name": "test_model", + "parameters-temperature": 0.9, + "parameters-max-output-tokens": 100, + "parameters": {"temperature": 0.9, "max-output-tokens": 100}, + }, + { + "id": 1, + "prompt": "test prompt 1", + "api": "test", + "model_name": "test_model", + "parameters-max-output-tokens": 100, + "parameters": {"max-output-tokens": 100}, + }, + { + "id": 2, + "prompt": "test prompt 2", + "api": "test", + "model_name": "test_model", + "parameters-max-output-tokens": 100, + "parameters": {"max-output-tokens": 100}, + }, + ] + assert ( + "Found parameters columns: ['parameters-temperature', 'parameters-max-output-tokens']" + in caplog.text + ) + assert "Loading experiment prompts from csv file test_in_input.csv" in caplog.text def test_experiment_read_input_file_error(temporary_data_folders): @@ -3065,3 +3126,84 @@ def test_save_completed_responses_to_csv(temporary_data_folders, caplog): # check logs assert f"Saving completed responses as csv to {filename}" in caplog.text + + +def test_save_completed_responses_to_csv_with_parameters( + temporary_data_folders, caplog +): + caplog.set_level(logging.INFO) + + settings = Settings(data_folder="data", max_queries=50, max_attempts=5) + with open("data/input/test_in_input.jsonl", "w") as f: + f.write( + '{"id": 0, "prompt": "test prompt 0", "api": "test", "model_name": "test_model"}\n' + ) + f.write( + '{"id": 1, "prompt": "test prompt 1", "api": "test", "model_name": "test_model"}\n' + ) + + experiment = Experiment("test_in_input.jsonl", settings=settings) + + # we will set the completed_responses attribute to a list of dictionaries + # and then check that the dataframe is created correctly + experiment.completed_responses = [ + { + "id": 0, + "prompt": "test prompt 0", + "api": "test", + "model_name": "test_model", + "response": "response 0", + "parameters": {"temperature": 0.5, "max_tokens": 100}, + }, + { + "id": 1, + "prompt": "test prompt 1", + "api": "test", + "model_name": "test_model", + "response": "response 1", + "parameters": {"max_tokens": 100}, + }, + ] + + # save the completed responses to a csv file + experiment.save_completed_responses_to_csv("test_out.csv") + + # check the csv file is created correctly + assert os.path.exists("test_out.csv") + + # check the csv file content + expected = pd.DataFrame( + { + "id": [0, 1], + "prompt": ["test prompt 0", "test prompt 1"], + "api": ["test", "test"], + "model_name": ["test_model", "test_model"], + "response": ["response 0", "response 1"], + "parameters": [ + '{"temperature": 0.5, "max_tokens": 100}', + '{"max_tokens": 100}', + ], + } + ) + loaded_csv = pd.read_csv("test_out.csv") + assert loaded_csv.equals(expected) + + # check logs + assert "Saving completed responses as csv to test_out.csv" in caplog.text + + # save the completed responses to a csv file without specifying the file name + os.makedirs("data/output/test_in_input/", exist_ok=True) + experiment.save_completed_responses_to_csv() + + # check the csv file is created correctly + filename = ( + f"data/output/test_in_input/{experiment.start_time}-completed-test_in_input.csv" + ) + assert os.path.exists(filename) + + # check the csv file content + loaded_csv = pd.read_csv(filename) + assert loaded_csv.equals(expected) + + # check logs + assert f"Saving completed responses as csv to {filename}" in caplog.text From 20a7edabc10d1a81c1b0643932764f706540f8e9 Mon Sep 17 00:00:00 2001 From: rchan Date: Tue, 24 Sep 2024 13:01:00 +0100 Subject: [PATCH 5/7] test experiment processing with csv input --- src/prompto/apis/testing/testing_api.py | 4 +- src/prompto/experiment.py | 17 ++- tests/conftest.py | 13 +++ tests/core/test_experiment.py | 88 +++++++++++++++ tests/core/test_experiment_process.py | 141 +++++++++++++++++++++++- 5 files changed, 254 insertions(+), 9 deletions(-) diff --git a/src/prompto/apis/testing/testing_api.py b/src/prompto/apis/testing/testing_api.py index 7d40f627..8cb37174 100644 --- a/src/prompto/apis/testing/testing_api.py +++ b/src/prompto/apis/testing/testing_api.py @@ -33,9 +33,9 @@ async def query(self, prompt_dict: dict, index: int | str) -> dict: raise_error_option = generation_config.get("raise_error", "") raise_error_type = generation_config.get("raise_error_type", "") - if raise_error_option == "True": + if raise_error_option.lower() in ["true", "yes"]: raise_error = True - elif raise_error_option == "False": + elif raise_error_option.lower() in ["false", "no"]: raise_error = False else: raise_error = random.randint(1, 5) == 1 diff --git a/src/prompto/experiment.py b/src/prompto/experiment.py index 8845cc70..6c76efff 100644 --- a/src/prompto/experiment.py +++ b/src/prompto/experiment.py @@ -50,7 +50,9 @@ def __init__( self.file_name: str = file_name # obtain experiment name from file name - self.experiment_name: str = self.file_name.removesuffix(".jsonl") + self.experiment_name: str = self.file_name.removesuffix(".jsonl").removesuffix( + ".csv" + ) # settings for the pipeline which includes input, output, and media folder locations self.settings: Settings = settings # experiment output folder is a subfolder of the output folder @@ -112,7 +114,7 @@ def __str__(self) -> str: return self.file_name @staticmethod - def _read_input_file(input_file_path) -> list[dict]: + def _read_input_file(input_file_path: str) -> list[dict]: with open(input_file_path, "r") as f: if input_file_path.endswith(".jsonl"): logging.info( @@ -648,6 +650,10 @@ async def query_model_and_record_response( ) if index is None: index = "NA" + id = prompt_dict.get("id", "NA") + # if id is NaN, set it to "NA" + if pd.isna(id): + id = "NA" # query the API timeout_seconds = 300 @@ -669,8 +675,7 @@ async def query_model_and_record_response( ) as err: # don't retry for selected errors, log the error and save an error response log_message = ( - f"Error (i={index}, id={prompt_dict.get('id', 'NA')}): " - f"{type(err).__name__} - {err}" + f"Error (i={index}, id={id}): " f"{type(err).__name__} - {err}" ) async with FILE_WRITE_LOCK: write_log_message( @@ -683,7 +688,7 @@ async def query_model_and_record_response( if attempt == self.settings.max_attempts: # we've already tried max_attempts times, so log the error and save an error response log_message = ( - f"Error (i={index}, id={prompt_dict.get('id', 'NA')}) " + f"Error (i={index}, id={id}) " f"after maximum {self.settings.max_attempts} attempts: " f"{type(err).__name__} - {err}" ) @@ -701,7 +706,7 @@ async def query_model_and_record_response( else: # we haven't tried max_attempts times yet, so log the error and return an Exception log_message = ( - f"Error (i={index}, id={prompt_dict.get('id', 'NA')}) on attempt " + f"Error (i={index}, id={id}) on attempt " f"{attempt} of {self.settings.max_attempts}: " f"{type(err).__name__} - {err}. Adding to the queue to try again later..." ) diff --git a/tests/conftest.py b/tests/conftest.py index 0386ac28..9fb16eae 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -449,6 +449,7 @@ def temporary_data_folder_for_processing(tmp_path: Path): tmp_path ├── data/ ├── input/ + ├── test_experiment.csv ├── test_experiment.jsonl ├── test_experiment_with_groups.jsonl └── test_experiment_eval.jsonl @@ -463,6 +464,18 @@ def temporary_data_folder_for_processing(tmp_path: Path): Path(tmp_path / "data" / "output").mkdir() Path(tmp_path / "data" / "media").mkdir() + # create a csv file with a few prompts + with open(Path(tmp_path / "data" / "input" / "test_experiment.csv"), "w") as f: + f.write( + "id,prompt,api,model_name,parameters-raise_error,parameters-raise_error_type\n" + ) + f.write('0,test prompt 1,test,model1,"no",\n') + f.write(',test prompt 2,test,model1,"yes","Exception"\n') + f.write('1,test prompt 3,test,model1,"yes",\n') + f.write('2,test prompt 4,test,model2,"no",\n') + f.write('3,test prompt 5,test,model2,"no",\n') + f.write('4,test prompt 6,test,model2,"no",\n') + # create a file with larger number of prompts with different APIs, models with no groups with open(Path(tmp_path / "data" / "input" / "test_experiment.jsonl"), "w") as f: f.write( diff --git a/tests/core/test_experiment.py b/tests/core/test_experiment.py index e89fb448..3a6d38a4 100644 --- a/tests/core/test_experiment.py +++ b/tests/core/test_experiment.py @@ -189,6 +189,94 @@ def test_experiment_init(temporary_data_folders): assert experiment._completed_responses_dataframe is None +def test_experiment_init_csv(temporary_data_folders): + # create a settings object + settings = Settings(data_folder="data", max_queries=50, max_attempts=5) + + # create a csv file in the input folder (which is created when initialising Settings object) + with open("data/input/test_in_input.csv", "w") as f: + f.write( + "id,prompt,api,model_name,parameters-temperature,parameters-max-output-tokens\n" + ) + f.write("0,test prompt 0,test,test_model,0.9,100\n") + f.write("1,test prompt 1,test,test_model,0.5,100\n") + + # create an experiment object + experiment = Experiment("test_in_input.csv", settings=settings) + + # check the experiment object has the correct attributes + assert experiment.file_name == "test_in_input.csv" + assert experiment.experiment_name == "test_in_input" + assert experiment.settings == settings + assert experiment.output_folder == "data/output/test_in_input" + assert experiment.input_file_path == "data/input/test_in_input.csv" + assert isinstance(experiment.creation_time, str) + assert isinstance(experiment.start_time, str) + assert ( + experiment.output_completed_jsonl_file_path + == f"data/output/test_in_input/{experiment.start_time}-completed-test_in_input.jsonl" + ) + assert ( + experiment.output_input_jsonl_file_out_path + == f"data/output/test_in_input/{experiment.start_time}-input-test_in_input.jsonl" + ) + assert experiment._experiment_prompts == [ + { + "id": 0, + "prompt": "test prompt 0", + "api": "test", + "model_name": "test_model", + "parameters-temperature": 0.9, + "parameters-max-output-tokens": 100, + "parameters": {"temperature": 0.9, "max-output-tokens": 100}, + }, + { + "id": 1, + "prompt": "test prompt 1", + "api": "test", + "model_name": "test_model", + "parameters-temperature": 0.5, + "parameters-max-output-tokens": 100, + "parameters": {"temperature": 0.5, "max-output-tokens": 100}, + }, + ] + # check property getter for experiment_prompts + assert experiment.experiment_prompts == [ + { + "id": 0, + "prompt": "test prompt 0", + "api": "test", + "model_name": "test_model", + "parameters-temperature": 0.9, + "parameters-max-output-tokens": 100, + "parameters": {"temperature": 0.9, "max-output-tokens": 100}, + }, + { + "id": 1, + "prompt": "test prompt 1", + "api": "test", + "model_name": "test_model", + "parameters-temperature": 0.5, + "parameters-max-output-tokens": 100, + "parameters": {"temperature": 0.5, "max-output-tokens": 100}, + }, + ] + assert experiment.number_queries == 2 + assert ( + experiment.log_file + == f"data/output/test_in_input/{experiment.start_time}-log-test_in_input.txt" + ) + + # test str method + assert str(experiment) == "test_in_input.csv" + + # test that grouped experiments have not been created yet + assert experiment._grouped_experiment_prompts == {} + + assert experiment.completed_responses == [] + assert experiment._completed_responses_dataframe is None + + def test_completed_responses_dataframe_getter(temporary_data_folders): # create a settings object settings = Settings(data_folder="data", max_queries=50, max_attempts=5) diff --git a/tests/core/test_experiment_process.py b/tests/core/test_experiment_process.py index 90708bdb..ccfaf736 100644 --- a/tests/core/test_experiment_process.py +++ b/tests/core/test_experiment_process.py @@ -152,6 +152,146 @@ async def test_process( assert log_msg in caplog.text +@pytest.mark.asyncio +async def test_process_using_csv( + temporary_data_folder_for_processing: None, + caplog: pytest.LogCaptureFixture, + capsys: pytest.CaptureFixture[str], +): + caplog.set_level(logging.INFO) + settings = Settings(data_folder="data", max_attempts=2, max_queries=200) + experiment = Experiment("test_experiment.csv", settings=settings) + + assert experiment.completed_responses == [] + assert not os.path.isdir(experiment.output_folder) + + result, avg_query_proc_time = await experiment.process() + + # assert that the output folder was created and input file was moved to it + assert os.path.isdir(experiment.output_folder) + assert not os.path.isfile("data/input/test_experiment.csv") + assert len(os.listdir("data/output/test_experiment")) == 4 + # assert created files in output + assert os.path.isfile( + f"data/output/test_experiment/{experiment.start_time}-input-test_experiment.csv" + ) + assert os.path.isfile( + f"data/output/test_experiment/{experiment.start_time}-completed-test_experiment.jsonl" + ) + assert os.path.isfile( + f"data/output/test_experiment/{experiment.start_time}-input-test_experiment.jsonl" + ) + assert os.path.isfile( + f"data/output/test_experiment/{experiment.start_time}-log-test_experiment.txt" + ) + + # check processing time + assert isinstance(avg_query_proc_time, float) + assert avg_query_proc_time > 0 + + # check result + assert len(result) == 6 + assert experiment.completed_responses == result + + # check that the response is saved to the output file + assert os.path.exists(experiment.output_completed_jsonl_file_path) + with open(experiment.output_completed_jsonl_file_path, "r") as f: + responses = [dict(json.loads(line)) for line in f] + + assert responses == result + + # check the content printed to the console (tqdm progress bar) + captured = capsys.readouterr() + print_msg = "Sending 6 queries at 200 QPM with RI of 0.3s (attempt 1/2)" + assert print_msg in captured.err + print_msg = "Waiting for responses (attempt 1/2)" + assert print_msg in captured.err + print_msg = "Sending 1 queries at 200 QPM with RI of 0.3s (attempt 2/2)" + assert print_msg in captured.err + print_msg = "Waiting for responses (attempt 2/2)" + assert print_msg in captured.err + + # check log messages + log_msg = "Processing experiment: test_experiment.csv.." + assert log_msg in caplog.text + log_msg = ( + "Moving data/input/test_experiment.csv to " + "data/output/test_experiment as " + "data/output/test_experiment/" + f"{experiment.start_time}-input-test_experiment.csv" + ) + assert log_msg in caplog.text + log_msg = ( + "Converting data/input/test_experiment.csv to jsonl file for processing..." + ) + assert log_msg in caplog.text + log_msg = ( + "Moving data/input/test_experiment.jsonl to " + "data/output/test_experiment as " + "data/output/test_experiment/" + f"{experiment.start_time}-input-test_experiment.jsonl" + ) + log_msg = "Sending 6 queries..." + assert log_msg in caplog.text + log_msg = ( + "Response received for model test (i=1, id=0.0)\n" + "Prompt: test prompt 1...\n" + "Response: This is a test response...\n" + ) + assert log_msg in caplog.text + log_msg = ( + "Error (i=2, id=NA) on attempt 1 of 2: " + "Exception - This is a test error which we should handle and return. " + "Adding to the queue to try again later..." + ) + assert log_msg in caplog.text + log_msg = "Error (i=3, id=1.0): ValueError - This is a test error which we should handle and return" + assert log_msg in caplog.text + log_msg = ( + "Error with model test (i=3, id=1.0)\n" + "Prompt: test prompt 3...\n" + "Error: This is a test error which we should handle and return\n" + ) + log_msg = ( + "Response received for model test (i=4, id=2.0)\n" + "Prompt: test prompt 4...\n" + "Response: This is a test response...\n" + ) + assert log_msg in caplog.text + log_msg = ( + "Response received for model test (i=5, id=3.0)\n" + "Prompt: test prompt 5...\n" + "Response: This is a test response...\n" + ) + assert log_msg in caplog.text + log_msg = ( + "Response received for model test (i=6, id=4.0)\n" + "Prompt: test prompt 6...\n" + "Response: This is a test response...\n" + ) + assert log_msg in caplog.text + log_msg = "Retrying 1 failed queries - attempt 2 of 2..." + assert log_msg in caplog.text + log_msg = ( + "Error (i=1, id=NA) after maximum 2 attempts: " + "Exception - This is a test error which we should handle and return" + ) + assert log_msg in caplog.text + log_msg = ( + "Error with model test (i=1, id=NA)\n" + "Prompt: test prompt 2...\n" + "Error: This is a test error which we should handle and return\n" + ) + log_msg = "Maximum attempts reached. Exiting..." + assert log_msg in caplog.text + log_msg = "Completed experiment: test_experiment.csv! " + assert log_msg in caplog.text + log_msg = "Experiment processing time: " + assert log_msg in caplog.text + log_msg = "Average time per query: " + assert log_msg in caplog.text + + @pytest.mark.asyncio async def test_process_with_max_queries_dict( temporary_data_folder_for_processing: None, caplog, capsys @@ -777,6 +917,5 @@ async def test_process_with_evaluation( # check that the evaluation function has been applied assert responses == result - print(responses) assert all(["evaluation" in response for response in responses]) assert all([response["evaluation"] is True for response in responses]) From fbb36d4963fda8423657c0f3e76bbf4d4379ee15 Mon Sep 17 00:00:00 2001 From: rchan Date: Tue, 24 Sep 2024 13:35:03 +0100 Subject: [PATCH 6/7] test run_experiment command with csv input and output --- tests/conftest.py | 18 +++-- tests/scripts/test_run_experiment.py | 107 +++++++++++++++++++++++++++ 2 files changed, 119 insertions(+), 6 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 9fb16eae..894d126c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -524,10 +524,10 @@ def temporary_data_folder_for_processing(tmp_path: Path): Path(tmp_path / "data" / "input" / "test_experiment_eval.jsonl"), "w" ) as f: f.write( - '{"id": 0, "api": "test", "model1": "test_model", "prompt": "test prompt 1", "parameters": {"raise_error": "False"}}\n' + '{"id": 0, "api": "test", "model_name": "model1", "prompt": "test prompt 1", "parameters": {"raise_error": "False"}}\n' ) f.write( - '{"id": 1, "api": "test", "model2": "test_model", "prompt": "test prompt 2", "parameters": {"raise_error": "False"}}\n' + '{"id": 1, "api": "test", "model_name": "model2", "prompt": "test prompt 2", "parameters": {"raise_error": "False"}}\n' ) # create a file with max queries dictionary @@ -583,19 +583,25 @@ def temporary_data_folder_judge(tmp_path: Path): # create input experiment file not in input folder with open(Path(tmp_path / "test-exp-not-in-input.jsonl"), "w") as f: f.write( - '{"id": 0, "api": "test", "model1": "test_model", "prompt": "test prompt 1", "parameters": {"raise_error": "False"}}\n' + '{"id": 0, "api": "test", "model_name": "model1", "prompt": "test prompt 1", "parameters": {"raise_error": "False"}}\n' ) f.write( - '{"id": 1, "api": "test", "model2": "test_model", "prompt": "test prompt 2", "parameters": {"raise_error": "False"}}\n' + '{"id": 1, "api": "test", "model_name": "model2", "prompt": "test prompt 2", "parameters": {"raise_error": "False"}}\n' ) + # create input csv file in input folder + with open(Path(tmp_path / "data" / "input" / "test-experiment.csv"), "w") as f: + f.write("id,prompt,api,model_name,parameters-raise_error,expected_response\n") + f.write('0,test prompt 1,test,model1,"no","This is a test response"\n') + f.write('1,test prompt 2,test,model2,"no","something else"\n') + # create input experiment file in input folder with open(Path(tmp_path / "data" / "input" / "test-experiment.jsonl"), "w") as f: f.write( - '{"id": 0, "api": "test", "model1": "test_model", "prompt": "test prompt 1", "parameters": {"raise_error": "False"}, "expected_response": "This is a test response"}\n' + '{"id": 0, "api": "test", "model_name": "model1", "prompt": "test prompt 1", "parameters": {"raise_error": "False"}, "expected_response": "This is a test response"}\n' ) f.write( - '{"id": 1, "api": "test", "model2": "test_model", "prompt": "test prompt 2", "parameters": {"raise_error": "False"}, "expected_response": "something else"}\n' + '{"id": 1, "api": "test", "model_name": "model2", "prompt": "test prompt 2", "parameters": {"raise_error": "False"}, "expected_response": "something else"}\n' ) # create a completed experiment file with "response" key in output folder diff --git a/tests/scripts/test_run_experiment.py b/tests/scripts/test_run_experiment.py index 267348a5..e6dc1f33 100644 --- a/tests/scripts/test_run_experiment.py +++ b/tests/scripts/test_run_experiment.py @@ -766,3 +766,110 @@ def test_run_experiment_judge_and_scorer(temporary_data_folder_judge): assert response["input-includes"] is False else: assert False + + +def test_run_experiment_judge_and_scorer_with_csv_input_and_output( + temporary_data_folder_judge, +): + result = shell( + "prompto_run_experiment " + "--file data/input/test-experiment.csv " + "--max-queries=200 " + "--judge-folder judge_loc " + "--templates template.txt,template2.txt " + "--judge judge2 " + "--scorer 'match, includes' " + "--output-as-csv" + ) + assert result.exit_code == 0 + assert "No environment file found at .env" in result.stderr + assert "Judge folder loaded from judge_loc" in result.stderr + assert "Templates to be used: ['template.txt', 'template2.txt']" in result.stderr + assert "Judges to be used: ['judge2']" in result.stderr + assert "Scoring functions to be used: ['match', 'includes']" in result.stderr + assert ( + "Settings: " + "data_folder=data, " + "max_queries=200, " + "max_attempts=5, " + "parallel=False\n" + "Subfolders: " + "input_folder=data/input, " + "output_folder=data/output, " + "media_folder=data/media" + ) in result.stderr + assert ( + "Starting processing experiment: data/input/test-experiment.csv..." + in result.stderr + ) + assert "Completed experiment: test-experiment.csv" in result.stderr + assert ( + "Starting processing judge of experiment: judge-test-experiment.jsonl..." + in result.stderr + ) + assert "Completed experiment: judge-test-experiment.jsonl" in result.stderr + assert "Experiment processed successfully!" in result.stderr + assert os.path.isdir("data/output/test-experiment") + assert os.path.isdir("data/output/judge-test-experiment") + + # check the output files for the test-experiment + completed_files = [ + x for x in os.listdir("data/output/test-experiment") if "completed" in x + ] + # should be 2 (one jsonl and one csv) + assert len(completed_files) == 2 + completed_jsonl_file = [ + file for file in completed_files if file.endswith(".jsonl") + ][0] + completed_csv_files = [file for file in completed_files if file.endswith(".csv")] + assert len(completed_csv_files) == 1 + + # load the output to check the scores have been added + with open(f"data/output/test-experiment/{completed_jsonl_file}", "r") as f: + responses = [dict(json.loads(line)) for line in f] + + # test that the scorers got added to the completed file + assert len(responses) == 2 + for response in responses: + if response["id"] == 0: + assert response["match"] is True + assert response["includes"] is True + elif response["id"] == 1: + assert response["match"] is False + assert response["includes"] is False + else: + assert False + + # check the output files for the judge-test-experiment + completed_files = [ + x for x in os.listdir("data/output/judge-test-experiment") if "completed" in x + ] + # should be 2 (one jsonl and one csv) + assert len(completed_files) == 2 + completed_jsonl_file = [ + file for file in completed_files if file.endswith(".jsonl") + ][0] + completed_csv_files = [file for file in completed_files if file.endswith(".csv")] + assert len(completed_csv_files) == 1 + + # load the output to check the scores have been added + with open(f"data/output/judge-test-experiment/{completed_jsonl_file}", "r") as f: + responses = [dict(json.loads(line)) for line in f] + + # test that the scorers got added to the completed judge file + assert len(responses) == 4 + for response in responses: + if response["id"] == "judge-judge2-template-0": + assert response["input-match"] is True + assert response["input-includes"] is True + elif response["id"] == "judge-judge2-template-1": + assert response["input-match"] is False + assert response["input-includes"] is False + elif response["id"] == "judge-judge2-template2-0": + assert response["input-match"] is True + assert response["input-includes"] is True + elif response["id"] == "judge-judge2-template2-1": + assert response["input-match"] is False + assert response["input-includes"] is False + else: + assert False From 3398e4016624d62920a547ba641c13411452682f Mon Sep 17 00:00:00 2001 From: rchan Date: Tue, 24 Sep 2024 13:47:14 +0100 Subject: [PATCH 7/7] bump version to 0.2.0 and update docs --- README.md | 2 +- docs/experiment_file.md | 20 ++++++++++++++++++++ pyproject.toml | 2 +- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c940e1b1..20ae72b3 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ A pre-print for this work is available on [arXiv](https://arxiv.org/abs/2408.118 The benefit of _asynchronous querying_ is that it allows for multiple requests to be sent to an API _without_ having to wait for the LLM's response, which is particularly useful to fully utilise the rate limits of an API. This is especially useful when an experiment file contains a large number of prompts and/or has several models to query. [_Asynchronous programming_](https://docs.python.org/3/library/asyncio.html) is simply a way for programs to avoid getting stuck on long tasks (like waiting for an LLM response from an API) and instead keep running other things at the same time (to send other queries). -With `prompto`, you are able to define your experiments of LLMs in a jsonl file where each line contains the prompt and any parameters to be used for a query of a model from a specific API. The library will process the experiment file and query models and store results. You are also able to query _multiple_ models from _different_ APIs in a single experiment file and `prompto` will take care of querying the models _asynchronously_ and in _parallel_. +With `prompto`, you are able to define your experiments of LLMs in a jsonl or csv file where each line/row contains the prompt and any parameters to be used for a query of a model from a specific API. The library will process the experiment file and query models and store results. You are also able to query _multiple_ models from _different_ APIs in a single experiment file and `prompto` will take care of querying the models _asynchronously_ and in _parallel_. The library is designed to be extensible and can be used to query different models. diff --git a/docs/experiment_file.md b/docs/experiment_file.md index 77b7d364..dce8d144 100644 --- a/docs/experiment_file.md +++ b/docs/experiment_file.md @@ -2,6 +2,8 @@ An experiment file is a [JSON Lines (jsonl)](https://jsonlines.org/) file that contains the prompts for the experiments along with any other parameters or metadata that is required for the prompt. Each line in the jsonl file is a valid JSON value which defines a particular input to the LLM which we will obtain a response for. We often refer to a single line in the jsonl file as a "`prompt_dict`" (prompt dictionary). +From `prompto` version 0.2.0 onwards, it's also possible to use `csv` files as input to the pipeline. See the [CSV input section](#csv-input) for more details. + For all models/APIs, we require the following keys in the `prompt_dict`: * `prompt`: the prompt for the model @@ -15,6 +17,9 @@ For all models/APIs, we require the following keys in the `prompt_dict`: In addition, there are other optional keys that can be included in the `prompt_dict`: +* `id`: a unique identifier for the prompt + * This is a string that can be used to uniquely identify the prompt. This is useful when you want to track the responses to the prompts and match them back to the original prompts + * This is not strictly required, but is often useful to have * `parameters`: the parameter settings / generation config for the query (given as a dictionary) * This is a dictionary that contains the parameters for the query. The parameters are specific to the model and the API being used. For example, for the Gemini API (`"api": "gemini"`), some parameters to configure are {`temperature`, `max_output_tokens`, `top_p`, `top_k`} etc. which are used to control the generation of the response. For the OpenAI API (`"api": "openai"`), some of these parameters are named differently for instance the maximum output tokens is set using the `max_tokens` parameter and `top_k` is not available to set. For Ollama (`"api": "ollama"`), the parameters are different again, e.g. the maximum number of tokens to predict is set using `num_predict` * See the API documentation for the specific API for the list of parameters that can be set and their default values @@ -23,3 +28,18 @@ In addition, there are other optional keys that can be included in the `prompt_d * Note that you can use parallel processing without using the "group" key, but using this key allows you to have full control in order group the prompts in a way that makes sense for your use case. See the [specifying rate limits documentation](rate_limits.md) for more details on parallel processing Lastly, there are other optional keys that are only available for certain APIs/models. For example, for the Gemini API, you can have a `multimedia` key which is a list of dictionaries defining the multimedia files (e.g. images/videos) to be used in the prompt to a multimodal LLM. For these, see the documentation for the specific API/model for more details. + +## CSV input + +For using CSV inputs, the `prompt_dict`s are defined as rows in the CSV file. The CSV file should have a header row with the keys corresponding to the keys above with the exception of the `parameters` key. The parameters (the keys in the dictionary) should have their own columns in the CSV file _prepended with a "parameters-" prefix_. For example, if you have a parameter `temperature` in the `parameters` dictionary, you should have a column named `parameters-temperature` in the CSV file. The values for the parameters should be in the corresponding columns. + +For example, the two jsonl and csv file inputs are equivalent: + +```json +{"id": "id-0", "prompt": "What is the capital of France?", "api": "openai", "model_name": "gpt-3.5-turbo", "parameters": {"temperature": 0.5, "max_tokens": 100}} +``` + +```csv +id,prompt,api,model_name,parameters-temperature,parameters-max_tokens +id-0,What is the capital of France?,openai,gpt-3.5-turbo,0.5,100 +``` diff --git a/pyproject.toml b/pyproject.toml index a652d301..8e380dab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "prompto" -version = "0.1.3" +version = "0.2.0" description = "Library for asynchronous querying of LLM API endpoints and logging progress" authors = [ "rchan ",