Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add total number of runs to logs & small fixes #72

Merged
merged 3 commits into from
Jan 18, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 17 additions & 14 deletions src/parameter_search.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"**Check if GPU is available**"
"Check if **GPU** is available"
]
},
{
Expand Down Expand Up @@ -165,7 +165,7 @@
"metadata": {},
"outputs": [],
"source": [
"!pip install -q -r ../requirements.txt --disable-pip-version-check && echo \"Success\""
"%pip install -q -r ../requirements.txt --disable-pip-version-check && echo \"Success\""
]
},
{
Expand Down Expand Up @@ -320,11 +320,11 @@
"outputs": [],
"source": [
"searchable_index_params = {\n",
" 'chunk_size' : [192, 256, 320],\n",
" 'chunk_overlap' : [64, 128, 192],\n",
" 'chunk_size' : [128, 256, 384],\n",
" 'chunk_overlap' : [16, 64, 128],\n",
"}\n",
"searchable_shared_params = {\n",
" 'embed_model' : [(\"huggingface\",\"all-MiniLM-L6-v2\")]\n",
" 'embed_model' : [(\"huggingface\",\"all-MiniLM-L6-v2\")] # add more as tuples: ,(\"huggingface\", \"BAAI/bge-base-en-v1.5\")\n",
"}\n",
"searchable_eval_params = {\n",
" 'retrieval_top_k' : [2, 4, 8]\n",
Expand Down Expand Up @@ -355,7 +355,7 @@
"shared_args = {\n",
" \"base_path\" : \"./data\", # where data goes\n",
" \"embed_api_key\" : {},\n",
" \"weaviate_url\" : f\"http://{get_host_ip()}:8081\" # IP address\n",
" \"weaviate_url\" : f\"http://{get_host_ip()}:8081\"\n",
"}\n",
"index_args = {\n",
" \"n_rows_to_load\" : 1000,\n",
Expand All @@ -366,8 +366,8 @@
" \"llm_module_name\": \"langchain.chat_models\",\n",
" \"llm_class_name\": \"ChatOpenAI\",\n",
" \"llm_kwargs\": {\n",
" \"openai_api_key\": \"\" , # TODO: Update with your key or use a different model\n",
" \"model_name\" : \"gpt-3.5-turbo\"\n",
" \"openai_api_key\": \"\" , # TODO: update with your key or use a different model\n",
" \"model_name\" : \"gpt-3.5-turbo\" # choose model, e.g. \"gpt-4\", \"gpt-3.5-turbo\"\n",
" },\n",
" \"evaluation_metrics\" : [\"context_precision\", \"context_relevancy\"]\n",
"}\n",
Expand Down Expand Up @@ -400,7 +400,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"from utils import ParameterSearch\n",
Expand All @@ -417,6 +419,7 @@
" search_method = search_method,\n",
" target_metric = target_metric,\n",
" evaluation_set_path=evaluation_set_path,\n",
" debug=False,\n",
")\n",
"\n",
"results = mysearch.run(weaviate_client)"
Expand Down Expand Up @@ -465,7 +468,7 @@
"metadata": {},
"outputs": [],
"source": [
"!pip install -q \"plotly\" --disable-pip-version-check && echo \"Plotly installed successfully\""
"%pip install -q \"plotly\" --disable-pip-version-check && echo \"Plotly installed successfully\""
]
},
{
Expand All @@ -483,8 +486,8 @@
"source": [
"from utils import add_embed_model_numerical_column, show_legend_embed_models\n",
"\n",
"parameter_search_results = add_embed_model_numerical_column(parameter_search_results)\n",
"show_legend_embed_models(parameter_search_results)"
"results = add_embed_model_numerical_column(results)\n",
"show_legend_embed_models(results)"
]
},
{
Expand All @@ -503,7 +506,7 @@
"import plotly.express as px\n",
"\n",
"dimensions = ['chunk_size', 'chunk_overlap', 'embed_model_numerical', 'retrieval_top_k', 'context_precision']\n",
"fig = px.parallel_coordinates(parameter_search_results, color=\"context_precision\",\n",
"fig = px.parallel_coordinates(results, color=\"context_precision\",\n",
" dimensions=dimensions,\n",
" color_continuous_scale=px.colors.sequential.Bluered)\n",
"fig.show()"
Expand Down Expand Up @@ -614,7 +617,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.9.13"
}
},
"nbformat": 4,
Expand Down
44 changes: 15 additions & 29 deletions src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,12 @@ def run(self, weaviate_client: weaviate.Client):
while True:
configs = self.create_configs(run_count)

# stop running when out of configs
if configs is None:
break

# create configs
indexing_config, evaluation_config = configs
indexing_config, evaluation_config, n_runs = configs

# create pipeline objects
indexing_pipeline, evaluation_pipeline = self.create_pipelines(
Expand All @@ -186,18 +187,20 @@ def run(self, weaviate_client: weaviate.Client):
)

# run indexing pipeline
self.run_indexing_pipeline(run_count, indexing_config, indexing_pipeline)

logger.info(
f"Starting indexing pipeline of RUN {run_count}/{n_runs} with {indexing_config}")
self.runner.run(indexing_pipeline)

check_weaviate_class_exists(
weaviate_client,
indexing_config["weaviate_class"],
)

# run evaluation pipeline
self.run_evaluation_pipeline(
run_count,
evaluation_config,
evaluation_pipeline,
logger.info(
f"Starting evaluation pipeline of run #{run_count} / {n_runs} with {evaluation_config}")
self.runner.run(input=evaluation_pipeline,
extra_volumes=self.extra_volumes,
)

# read metrics from pipeline output
Expand All @@ -222,6 +225,7 @@ def create_configs(self, run_count: int):
if self.search_method == "grid_search":
# all possible combinations of parameters
all_combinations = list(cartesian_product(self.searchable_params))
n_runs = len(all_combinations)

# when all combinations have been tried, stop searching
if run_count > len(all_combinations) - 1:
Expand Down Expand Up @@ -252,9 +256,8 @@ def create_configs(self, run_count: int):
keys_to_try.append(key)
values_to_try.append(option)
step += 1
variations_to_try = [
{keys_to_try[i]: values_to_try[i]} for i in range(len(keys_to_try))
]
variations_to_try = [{keys_to_try[i]: values_to_try[i]} for i in range(len(keys_to_try))]
n_runs = len(variations_to_try) + 1

# if there are no variations to try, just schedule one run
if len(variations_to_try) == 0:
Expand Down Expand Up @@ -315,7 +318,7 @@ def create_configs(self, run_count: int):
"embed_model"
] = indexing_config["embed_model"][1]

return indexing_config, evaluation_config
return indexing_config, evaluation_config, n_runs

def create_pipelines(self, indexing_config, evaluation_config):
# create indexing pipeline
Expand Down Expand Up @@ -351,21 +354,4 @@ def create_pipelines(self, indexing_config, evaluation_config):
logger.info("\nEvaluation pipeline parameters:")
logger.info({**self.shared_args, **self.eval_args, **evaluation_config})

return indexing_pipeline, evaluation_pipeline

def run_indexing_pipeline(self, run_count, indexing_config, indexing_pipeline):
logger.info(
f"Starting indexing pipeline of run #{run_count} with {indexing_config}",
)
self.runner.run(indexing_pipeline)

def run_evaluation_pipeline(
self,
run_count,
evaluation_config,
evaluation_pipeline,
):
logger.info(
f"Starting evaluation pipeline of run #{run_count} with {evaluation_config}",
)
self.runner.run(input=evaluation_pipeline, extra_volumes=self.extra_volumes)
return indexing_pipeline, evaluation_pipeline
Loading