From 7b47a37ec92d32f2f3f4ef221512d3df8e7ec7fa Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Mon, 14 Jul 2025 22:18:48 -0700 Subject: [PATCH 01/21] WIP refiner --- codeflash/api/aiservice.py | 76 +++++++++ codeflash/models/models.py | 3 +- codeflash/optimization/function_optimizer.py | 169 +++++++++++++------ codeflash/result/critic.py | 5 +- 4 files changed, 204 insertions(+), 49 deletions(-) diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index a4172afb..be6f1623 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Any import requests +from pydantic.dataclasses import dataclass from pydantic.json import pydantic_encoder from codeflash.cli_cmds.console import console, logger @@ -23,6 +24,18 @@ from codeflash.models.ExperimentMetadata import ExperimentMetadata +@dataclass(frozen=True) +class AIServiceRefinerRequest: + original_source_code: str + original_read_only_dependency_code: str + optimized_source_code: str + optimized_explanation: str + trace_id: str + original_line_profiler_results: str + optimized_line_profiler_results: str + experiment_metadata: ExperimentMetadata | None = None + + class AiServiceClient: def __init__(self) -> None: self.base_url = self.get_aiservice_base_url() @@ -219,6 +232,69 @@ def optimize_python_code_line_profiler( # noqa: D417 console.rule() return [] + def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]) -> list[OptimizedCandidate]: + payload = [ + { + "original_source_code": opt.original_source_code, + "original_read_only_dependency_code": opt.original_read_only_dependency_code, + "original_line_profiler_results": opt.original_line_profiler_results, + "optimized_source_code": opt.optimized_source_code, + "optimized_explanation": opt.optimized_explanation, + "optimized_line_profiler_results": opt.optimized_line_profiler_results, + "trace_id": opt.trace_id, + "python_version": platform.python_version(), + "experiment_metadata": opt.experiment_metadata, + "codeflash_version": codeflash_version, + "lsp_mode": is_LSP_enabled(), + } + for opt in request + ] + """Optimize the given python code for performance by making a request to the Django endpoint. + + Parameters + ---------- + - source_code (str): The python code to optimize. + - dependency_code (str): The dependency code used as read-only context for the optimization + - trace_id (str): Trace id of optimization run + - num_candidates (int): Number of optimization variants to generate. Default is 10. + - experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization + + Returns + ------- + - List[OptimizationCandidate]: A list of Optimization Candidates. + + """ + + logger.info(f"Refining {len(request)} optimizations…") + console.rule() + try: + response = self.make_ai_service_request("/optimize-refinement", payload=payload, timeout=600) + except requests.exceptions.RequestException as e: + logger.exception(f"Error generating optimization refinements: {e}") + ph("cli-optimize-error-caught", {"error": str(e)}) + return [] + + if response.status_code == 200: + optimizations_json = response.json()["optimizations"] + logger.info(f"Generated {len(optimizations_json)} candidate optimizations.") + console.rule() + return [ + OptimizedCandidate( + source_code=opt["source_code"], + explanation=opt["explanation"], + optimization_id=opt["optimization_id"], + ) + for opt in optimizations_json + ] + try: + error = response.json()["error"] + except Exception: + error = response.text + logger.error(f"Error generating optimized candidates: {response.status_code} - {error}") + ph("cli-optimize-error-response", {"response_status_code": response.status_code, "error": error}) + console.rule() + return [] + def log_results( # noqa: D417 self, function_trace_id: str, diff --git a/codeflash/models/models.py b/codeflash/models/models.py index e96d1242..c29b7cc6 100644 --- a/codeflash/models/models.py +++ b/codeflash/models/models.py @@ -75,12 +75,13 @@ def __hash__(self) -> int: class BestOptimization(BaseModel): candidate: OptimizedCandidate - helper_functions: list[FunctionSource] + code_context: CodeOptimizationContext runtime: int replay_performance_gain: Optional[dict[BenchmarkKey, float]] = None winning_behavior_test_results: TestResults winning_benchmarking_test_results: TestResults winning_replay_benchmarking_test_results: Optional[TestResults] = None + line_profiler_test_results: dict @dataclass(frozen=True) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index ef7b215b..74309085 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -18,7 +18,7 @@ from rich.syntax import Syntax from rich.tree import Tree -from codeflash.api.aiservice import AiServiceClient, LocalAiServiceClient +from codeflash.api.aiservice import AiServiceClient, AIServiceRefinerRequest, LocalAiServiceClient from codeflash.api.cfapi import add_code_context_hash, mark_optimization_success from codeflash.benchmarking.utils import process_benchmark_data from codeflash.cli_cmds.console import code_print, console, logger, progress_bar @@ -146,6 +146,7 @@ def __init__( self.generate_and_instrument_tests_results: ( tuple[GeneratedTestsList, dict[str, set[FunctionCalledInTest]], OptimizationSet] | None ) = None + self.valid_optimizations: list[BestOptimization] = list() # TODO: Figure out the dataclass type for this def can_be_optimized(self) -> Result[tuple[bool, CodeOptimizationContext, dict[Path, str]], str]: should_run_experiment = self.experiment_id is not None @@ -390,8 +391,11 @@ def determine_best_candidate( candidate_index = 0 original_len = len(candidates) while candidates: - done = True if future_line_profile_results is None else future_line_profile_results.done() - if done and (future_line_profile_results is not None): + candidate_index += 1 + line_profiler_done = ( + True if future_line_profile_results is None else future_line_profile_results.done() + ) + if line_profiler_done and (future_line_profile_results is not None): line_profile_results = future_line_profile_results.result() candidates.extend(line_profile_results) original_len += len(line_profile_results) @@ -400,7 +404,6 @@ def determine_best_candidate( ) future_line_profile_results = None candidate = candidates.popleft() - candidate_index += 1 get_run_tmp_file(Path(f"test_return_values_{candidate_index}.bin")).unlink(missing_ok=True) get_run_tmp_file(Path(f"test_return_values_{candidate_index}.sqlite")).unlink(missing_ok=True) logger.info(f"Optimization candidate {candidate_index}/{original_len}:") @@ -451,9 +454,11 @@ def determine_best_candidate( tree = Tree(f"Candidate #{candidate_index} - Runtime Information") benchmark_tree = None if speedup_critic( - candidate_result, original_code_baseline.runtime, best_runtime_until_now + candidate_result, original_code_baseline.runtime, best_runtime_until_now=None ) and quantity_of_tests_critic(candidate_result): - tree.add("This candidate is faster than the previous best candidate. 🚀") + tree.add( + "This candidate is faster than the previous best candidate. 🚀" + ) # TODO: Change this description tree.add(f"Original summed runtime: {humanize_runtime(original_code_baseline.runtime)}") tree.add( f"Best summed runtime: {humanize_runtime(candidate_result.best_test_runtime)} " @@ -462,6 +467,11 @@ def determine_best_candidate( ) tree.add(f"Speedup percentage: {perf_gain * 100:.1f}%") tree.add(f"Speedup ratio: {perf_gain + 1:.3f}X") + line_profile_test_results = self.line_profiler_step( + code_context=code_context, + original_helper_code=original_helper_code, + candidate_index=candidate_index, + ) replay_perf_gain = {} if self.args.benchmark: test_results_by_benchmark = ( @@ -486,13 +496,15 @@ def determine_best_candidate( best_optimization = BestOptimization( candidate=candidate, - helper_functions=code_context.helper_functions, + code_context=code_context, runtime=best_test_runtime, + line_profiler_test_results=line_profile_test_results, winning_behavior_test_results=candidate_result.behavior_test_results, replay_performance_gain=replay_perf_gain if self.args.benchmark else None, winning_benchmarking_test_results=candidate_result.benchmarking_test_results, winning_replay_benchmarking_test_results=candidate_result.benchmarking_test_results, ) + self.valid_optimizations.append(best_optimization) best_runtime_until_now = best_test_runtime else: tree.add( @@ -510,8 +522,9 @@ def determine_best_candidate( self.write_code_and_helpers( self.function_to_optimize_source_code, original_helper_code, self.function_to_optimize.file_path ) + if (not len(candidates)) and ( - not done + not line_profiler_done ): # all original candidates processed but lp results haven't been processed concurrent.futures.wait([future_line_profile_results]) line_profile_results = future_line_profile_results.result() @@ -521,6 +534,25 @@ def determine_best_candidate( f"Added results from line profiler to candidates, total candidates now: {original_len}" ) future_line_profile_results = None + + if len(candidates) == 0 and len(self.valid_optimizations) > 0: + # TODO: Instead of doing it all at once at the end, do it one by one as the optimizations + # are found. This way we can hide the time waiting for the LLM results. + self.refine_optimizations( + valid_optimizations=self.valid_optimizations, + original_code_baseline=original_code_baseline, + code_context=code_context, + trace_id=self.function_trace_id[:-4] + exp_type, + experiment_metadata=ExperimentMetadata( + id=self.experiment_id, group="control" if exp_type == "EXP0" else "experiment" + ) + if self.experiment_id + else self.function_trace_id, + ai_service_client=ai_service_client, + executor=executor, + ) + + print("hi") except KeyboardInterrupt as e: self.write_code_and_helpers( self.function_to_optimize_source_code, original_helper_code, self.function_to_optimize.file_path @@ -537,6 +569,36 @@ def determine_best_candidate( ) return best_optimization + def refine_optimizations( + self, + valid_optimizations: list[BestOptimization], + original_code_baseline: OriginalCodeBaseline, + code_context: CodeOptimizationContext, + trace_id: str, + experiment_metadata: ExperimentMetadata, + ai_service_client: AiServiceClient, + executor: concurrent.futures.ThreadPoolExecutor, + ): + request = [ + AIServiceRefinerRequest( + original_source_code=code_context.read_writable_code, + original_read_only_dependency_code=code_context.read_only_context_code, + optimized_source_code=opt.candidate.source_code, + optimized_explanation=opt.candidate.explanation, + trace_id=trace_id, + original_line_profiler_results=original_code_baseline.line_profile_results["str_out"], + optimized_line_profiler_results=opt.line_profiler_test_results["str_out"], + experiment_metadata=experiment_metadata, + ) + for opt in valid_optimizations + ] + future_line_profile_results = executor.submit( + ai_service_client.optimize_python_code_refinement, request=request + ) + concurrent.futures.wait([future_line_profile_results]) + line_profile_results = future_line_profile_results.result() + print("hi") + def log_successful_optimization( self, explanation: Explanation, generated_tests: GeneratedTestsList, exp_type: str ) -> None: @@ -1074,16 +1136,8 @@ def establish_original_code_baseline( assert (test_framework := self.args.test_framework) in {"pytest", "unittest"} # noqa: RUF018 success = True - test_env = os.environ.copy() - test_env["CODEFLASH_TEST_ITERATION"] = "0" - test_env["CODEFLASH_TRACER_DISABLE"] = "1" - test_env["CODEFLASH_LOOP_INDEX"] = "0" - if "PYTHONPATH" not in test_env: - test_env["PYTHONPATH"] = str(self.args.project_root) - else: - test_env["PYTHONPATH"] += os.pathsep + str(self.args.project_root) + test_env = self.get_test_env(codeflash_loop_index=0, codeflash_test_iteration=0, codeflash_tracer_disable=1) - coverage_results = None # Instrument codeflash capture try: instrument_codeflash_capture( @@ -1112,28 +1166,10 @@ def establish_original_code_baseline( if not coverage_critic(coverage_results, self.args.test_framework): return Failure("The threshold for test coverage was not met.") if test_framework == "pytest": - try: - line_profiler_output_file = add_decorator_imports(self.function_to_optimize, code_context) - line_profile_results, _ = self.run_and_parse_tests( - testing_type=TestingMode.LINE_PROFILE, - test_env=test_env, - test_files=self.test_files, - optimization_iteration=0, - testing_time=TOTAL_LOOPING_TIME, - enable_coverage=False, - code_context=code_context, - line_profiler_output_file=line_profiler_output_file, - ) - finally: - # Remove codeflash capture - self.write_code_and_helpers( - self.function_to_optimize_source_code, original_helper_code, self.function_to_optimize.file_path - ) - if line_profile_results["str_out"] == "": - logger.warning( - f"Couldn't run line profiler for original function {self.function_to_optimize.function_name}" - ) - console.rule() + line_profile_results = self.line_profiler_step( + code_context=code_context, original_helper_code=original_helper_code, candidate_index=0 + ) + console.rule() benchmarking_results, _ = self.run_and_parse_tests( testing_type=TestingMode.PERFORMANCE, test_env=test_env, @@ -1229,14 +1265,11 @@ def run_optimized_candidate( assert (test_framework := self.args.test_framework) in {"pytest", "unittest"} # noqa: RUF018 with progress_bar("Testing optimization candidate"): - test_env = os.environ.copy() - test_env["CODEFLASH_LOOP_INDEX"] = "0" - test_env["CODEFLASH_TEST_ITERATION"] = str(optimization_candidate_index) - test_env["CODEFLASH_TRACER_DISABLE"] = "1" - if "PYTHONPATH" not in test_env: - test_env["PYTHONPATH"] = str(self.project_root) - else: - test_env["PYTHONPATH"] += os.pathsep + str(self.project_root) + test_env = self.get_test_env( + codeflash_loop_index=0, + codeflash_test_iteration=optimization_candidate_index, + codeflash_tracer_disable=1, + ) get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.sqlite")).unlink(missing_ok=True) # Instrument codeflash capture @@ -1470,3 +1503,45 @@ def cleanup_generated_files(self) -> None: paths_to_cleanup.append(test_file.benchmarking_file_path) cleanup_paths(paths_to_cleanup) + + def get_test_env( + self, codeflash_loop_index: int, codeflash_test_iteration: int, codeflash_tracer_disable: int = 1 + ) -> dict: + test_env = os.environ.copy() + test_env["CODEFLASH_TEST_ITERATION"] = str(codeflash_test_iteration) + test_env["CODEFLASH_TRACER_DISABLE"] = str(codeflash_tracer_disable) + test_env["CODEFLASH_LOOP_INDEX"] = str(codeflash_loop_index) + if "PYTHONPATH" not in test_env: + test_env["PYTHONPATH"] = str(self.args.project_root) + else: + test_env["PYTHONPATH"] += os.pathsep + str(self.args.project_root) + return test_env + + def line_profiler_step( + self, code_context: CodeOptimizationContext, original_helper_code: dict[Path, str], candidate_index: int + ) -> dict: + try: + test_env = self.get_test_env( + codeflash_loop_index=0, codeflash_test_iteration=candidate_index, codeflash_tracer_disable=1 + ) + line_profiler_output_file = add_decorator_imports(self.function_to_optimize, code_context) + line_profile_results, _ = self.run_and_parse_tests( + testing_type=TestingMode.LINE_PROFILE, + test_env=test_env, + test_files=self.test_files, + optimization_iteration=0, + testing_time=TOTAL_LOOPING_TIME, + enable_coverage=False, + code_context=code_context, + line_profiler_output_file=line_profiler_output_file, + ) + finally: + # Remove codeflash capture + self.write_code_and_helpers( + self.function_to_optimize_source_code, original_helper_code, self.function_to_optimize.file_path + ) + if line_profile_results["str_out"] == "": + logger.warning( + f"Couldn't run line profiler for original function {self.function_to_optimize.function_name}" + ) + return line_profile_results diff --git a/codeflash/result/critic.py b/codeflash/result/critic.py index 32348232..aff79195 100644 --- a/codeflash/result/critic.py +++ b/codeflash/result/critic.py @@ -28,7 +28,7 @@ def performance_gain(*, original_runtime_ns: int, optimized_runtime_ns: int) -> def speedup_critic( candidate_result: OptimizedCandidateResult, original_code_runtime: int, - best_runtime_until_now: int, + best_runtime_until_now: int | None, disable_gh_action_noise: Optional[bool] = None, ) -> bool: """Take in a correct optimized Test Result and decide if the optimization should actually be surfaced to the user. @@ -47,6 +47,9 @@ def speedup_critic( perf_gain = performance_gain( original_runtime_ns=original_code_runtime, optimized_runtime_ns=candidate_result.best_test_runtime ) + if best_runtime_until_now is None: + # collect all optimizations with thi + return bool(perf_gain > noise_floor) return bool(perf_gain > noise_floor and candidate_result.best_test_runtime < best_runtime_until_now) From 9da396b2ff706fb7eacf9c71d8edbfa827c5c458 Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Mon, 14 Jul 2025 22:28:20 -0700 Subject: [PATCH 02/21] some fixes --- codeflash/api/aiservice.py | 4 ++-- codeflash/optimization/function_optimizer.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index be6f1623..cd62a5bb 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -13,6 +13,7 @@ from codeflash.cli_cmds.console import console, logger from codeflash.code_utils.env_utils import get_codeflash_api_key, is_LSP_enabled from codeflash.code_utils.git_utils import get_last_commit_author_if_pr_exists, get_repo_owner_and_name +from codeflash.models.ExperimentMetadata import ExperimentMetadata from codeflash.models.models import OptimizedCandidate from codeflash.telemetry.posthog_cf import ph from codeflash.version import __version__ as codeflash_version @@ -21,7 +22,6 @@ from pathlib import Path from codeflash.discovery.functions_to_optimize import FunctionToOptimize - from codeflash.models.ExperimentMetadata import ExperimentMetadata @dataclass(frozen=True) @@ -33,7 +33,7 @@ class AIServiceRefinerRequest: trace_id: str original_line_profiler_results: str optimized_line_profiler_results: str - experiment_metadata: ExperimentMetadata | None = None + experiment_metadata: ExperimentMetadata | None class AiServiceClient: diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 74309085..3ea85ef9 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -547,7 +547,7 @@ def determine_best_candidate( id=self.experiment_id, group="control" if exp_type == "EXP0" else "experiment" ) if self.experiment_id - else self.function_trace_id, + else None, ai_service_client=ai_service_client, executor=executor, ) @@ -575,7 +575,7 @@ def refine_optimizations( original_code_baseline: OriginalCodeBaseline, code_context: CodeOptimizationContext, trace_id: str, - experiment_metadata: ExperimentMetadata, + experiment_metadata: ExperimentMetadata | None, ai_service_client: AiServiceClient, executor: concurrent.futures.ThreadPoolExecutor, ): From 9627f73078cc0296b95c3ae1a145cfbad7641a84 Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Mon, 14 Jul 2025 22:35:15 -0700 Subject: [PATCH 03/21] change url --- codeflash/api/aiservice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index cd62a5bb..99bc81a0 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -268,7 +268,7 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest] logger.info(f"Refining {len(request)} optimizations…") console.rule() try: - response = self.make_ai_service_request("/optimize-refinement", payload=payload, timeout=600) + response = self.make_ai_service_request("/refinement", payload=payload, timeout=600) except requests.exceptions.RequestException as e: logger.exception(f"Error generating optimization refinements: {e}") ph("cli-optimize-error-caught", {"error": str(e)}) From 9826b00a9ae2a6a5eda6255b4de979d359261ff0 Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Mon, 14 Jul 2025 23:08:20 -0700 Subject: [PATCH 04/21] fixes --- codeflash/api/aiservice.py | 15 ++++----------- codeflash/optimization/function_optimizer.py | 14 ++++++-------- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index 99bc81a0..d3e858b6 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -232,7 +232,7 @@ def optimize_python_code_line_profiler( # noqa: D417 console.rule() return [] - def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]) -> list[OptimizedCandidate]: + def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]) -> list[str]: payload = [ { "original_source_code": opt.original_source_code, @@ -275,17 +275,10 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest] return [] if response.status_code == 200: - optimizations_json = response.json()["optimizations"] - logger.info(f"Generated {len(optimizations_json)} candidate optimizations.") + refined_optimizations = response.json()["result"] + logger.info(f"Generated {len(refined_optimizations)} candidate refinements.") console.rule() - return [ - OptimizedCandidate( - source_code=opt["source_code"], - explanation=opt["explanation"], - optimization_id=opt["optimization_id"], - ) - for opt in optimizations_json - ] + return refined_optimizations try: error = response.json()["error"] except Exception: diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 3ea85ef9..9e4415f7 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -538,7 +538,7 @@ def determine_best_candidate( if len(candidates) == 0 and len(self.valid_optimizations) > 0: # TODO: Instead of doing it all at once at the end, do it one by one as the optimizations # are found. This way we can hide the time waiting for the LLM results. - self.refine_optimizations( + refinement_diffs = self.refine_optimizations( valid_optimizations=self.valid_optimizations, original_code_baseline=original_code_baseline, code_context=code_context, @@ -578,7 +578,7 @@ def refine_optimizations( experiment_metadata: ExperimentMetadata | None, ai_service_client: AiServiceClient, executor: concurrent.futures.ThreadPoolExecutor, - ): + ) -> list[str]: request = [ AIServiceRefinerRequest( original_source_code=code_context.read_writable_code, @@ -592,12 +592,10 @@ def refine_optimizations( ) for opt in valid_optimizations ] - future_line_profile_results = executor.submit( - ai_service_client.optimize_python_code_refinement, request=request - ) - concurrent.futures.wait([future_line_profile_results]) - line_profile_results = future_line_profile_results.result() - print("hi") + future_refinement_results = executor.submit(ai_service_client.optimize_python_code_refinement, request=request) + concurrent.futures.wait([future_refinement_results]) + refinement_results = future_refinement_results.result() + return refinement_results def log_successful_optimization( self, explanation: Explanation, generated_tests: GeneratedTestsList, exp_type: str From 50cf370c35c11e3db5ad587b051e274ac885314b Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Tue, 15 Jul 2025 20:36:44 -0700 Subject: [PATCH 05/21] diff format not working yet --- codeflash/optimization/function_optimizer.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 9e4415f7..19b60ea1 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -370,6 +370,7 @@ def determine_best_candidate( ) console.rule() candidates = deque(candidates) + refinement_done = False # Start a new thread for AI service request, start loop in main thread # check if aiservice request is complete, when it is complete, append result to the candidates list with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: @@ -535,7 +536,7 @@ def determine_best_candidate( ) future_line_profile_results = None - if len(candidates) == 0 and len(self.valid_optimizations) > 0: + if len(candidates) == 0 and len(self.valid_optimizations) > 0 and not refinement_done: # TODO: Instead of doing it all at once at the end, do it one by one as the optimizations # are found. This way we can hide the time waiting for the LLM results. refinement_diffs = self.refine_optimizations( @@ -551,8 +552,12 @@ def determine_best_candidate( ai_service_client=ai_service_client, executor=executor, ) - - print("hi") + more_opt_candidates = [OptimizedCandidate(source_code=refinement_diffs[i], explanation=self.valid_optimizations[i].candidate.explanation, optimization_id=self.valid_optimizations[i].candidate.optimization_id) for i in range(len(refinement_diffs))] + # we no longer need to apply diffs since we are generating the entire code again + candidates.extend(more_opt_candidates) + print("added candidates from refinement") + original_len += len(more_opt_candidates) + refinement_done = True except KeyboardInterrupt as e: self.write_code_and_helpers( self.function_to_optimize_source_code, original_helper_code, self.function_to_optimize.file_path @@ -560,6 +565,7 @@ def determine_best_candidate( logger.exception(f"Optimization interrupted: {e}") raise + #need to figure out best candidate here before we return best_optimization ai_service_client.log_results( function_trace_id=self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id, speedup_ratio=speedup_ratios, From 58e44d32d6b01ba4763afaa2e03208abb9a328bd Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Tue, 15 Jul 2025 20:50:19 -0700 Subject: [PATCH 06/21] get some heuristic working for best optimization --- codeflash/optimization/function_optimizer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 19b60ea1..7cecad1b 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -357,6 +357,8 @@ def determine_best_candidate( file_path_to_helper_classes: dict[Path, set[str]], exp_type: str, ) -> BestOptimization | None: + #TODO remove + from codeflash.models.models import OptimizedCandidate best_optimization: BestOptimization | None = None best_runtime_until_now = original_code_baseline.runtime From 77ed5c8ec27f93425ce6428ba78187f5cab5fb98 Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Wed, 16 Jul 2025 16:01:00 -0700 Subject: [PATCH 07/21] working dirty implementation of ranked choice voting for finding best optimization --- codeflash/optimization/function_optimizer.py | 55 +++++++++++++++++++- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 7cecad1b..b513c511 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -2,6 +2,7 @@ import ast import concurrent.futures +import difflib import os import random import subprocess @@ -567,7 +568,57 @@ def determine_best_candidate( logger.exception(f"Optimization interrupted: {e}") raise - #need to figure out best candidate here before we return best_optimization + def diff_length(a: str, b: str) -> int: + """ + Compute the length (in characters) of the unified diff between two strings. + + Parameters: + a (str): Original string. + b (str): Modified string. + + Returns: + int: Total number of characters in the diff. + """ + # Split input strings into lines for line-by-line diff + a_lines = a.splitlines(keepends=True) + b_lines = b.splitlines(keepends=True) + + # Compute unified diff + diff_lines = list(difflib.unified_diff(a_lines, b_lines, lineterm="")) + + # Join all lines with newline to calculate total diff length + diff_text = "\n".join(diff_lines) + + return len(diff_text) + + def create_rank_dictionary_compact(int_array: list[int]) -> dict[int, int]: + """ + Creates a dictionary from a list of ints, mapping the original index to its rank. + This version uses a more compact, "Pythonic" implementation. + + Args: + int_array: A list of integers. + + Returns: + A dictionary where keys are original indices and values are the + rank of the element in ascending order. + """ + # Sort the indices of the array based on their corresponding values + sorted_indices = sorted(range(len(int_array)), key=lambda i: int_array[i]) + + # Create a dictionary mapping the original index to its rank (its position in the sorted list) + return {original_index: rank for rank, original_index in enumerate(sorted_indices)} + + #need to figure out the best candidate here before we return best_optimization + diff_lens_list = [] + runtimes_list = [] + for valid_opt in self.valid_optimizations: + diff_lens_list.append(diff_length(valid_opt.candidate.source_code, code_context.read_writable_code)) + runtimes_list.append(valid_opt.runtime) + diff_lens_ranking = create_rank_dictionary_compact(diff_lens_list) + runtimes_ranking = create_rank_dictionary_compact(runtimes_list) + overall_ranking = {key:diff_lens_ranking[key] + runtimes_ranking[key] for key in diff_lens_ranking.keys()} + min_key = min(overall_ranking, key=overall_ranking.get) ai_service_client.log_results( function_trace_id=self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id, speedup_ratio=speedup_ratios, @@ -575,7 +626,7 @@ def determine_best_candidate( optimized_runtime=optimized_runtimes, is_correct=is_correct, ) - return best_optimization + return self.valid_optimizations[min_key] def refine_optimizations( self, From 5be61da086ffc44471ff84e13650979d4ee46b96 Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Wed, 16 Jul 2025 18:32:25 -0700 Subject: [PATCH 08/21] First working version of the refiner Signed-off-by: Saurabh Misra --- codeflash/api/aiservice.py | 6 ++++++ codeflash/optimization/function_optimizer.py | 17 ++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index d3e858b6..b13fe7d8 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -28,8 +28,11 @@ class AIServiceRefinerRequest: original_source_code: str original_read_only_dependency_code: str + original_code_runtime: str optimized_source_code: str optimized_explanation: str + optimized_code_runtime: str + speedup: str trace_id: str original_line_profiler_results: str optimized_line_profiler_results: str @@ -238,9 +241,12 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest] "original_source_code": opt.original_source_code, "original_read_only_dependency_code": opt.original_read_only_dependency_code, "original_line_profiler_results": opt.original_line_profiler_results, + "original_code_runtime": opt.original_code_runtime, "optimized_source_code": opt.optimized_source_code, "optimized_explanation": opt.optimized_explanation, "optimized_line_profiler_results": opt.optimized_line_profiler_results, + "optimized_code_runtime": opt.optimized_code_runtime, + "speedup": opt.speedup, "trace_id": opt.trace_id, "python_version": platform.python_version(), "experiment_metadata": opt.experiment_metadata, diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 7cecad1b..84a97e44 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -357,8 +357,9 @@ def determine_best_candidate( file_path_to_helper_classes: dict[Path, set[str]], exp_type: str, ) -> BestOptimization | None: - #TODO remove + # TODO remove from codeflash.models.models import OptimizedCandidate + best_optimization: BestOptimization | None = None best_runtime_until_now = original_code_baseline.runtime @@ -554,7 +555,14 @@ def determine_best_candidate( ai_service_client=ai_service_client, executor=executor, ) - more_opt_candidates = [OptimizedCandidate(source_code=refinement_diffs[i], explanation=self.valid_optimizations[i].candidate.explanation, optimization_id=self.valid_optimizations[i].candidate.optimization_id) for i in range(len(refinement_diffs))] + more_opt_candidates = [ + OptimizedCandidate( + source_code=refinement_diffs[i], + explanation=self.valid_optimizations[i].candidate.explanation, + optimization_id=self.valid_optimizations[i].candidate.optimization_id, + ) + for i in range(len(refinement_diffs)) + ] # we no longer need to apply diffs since we are generating the entire code again candidates.extend(more_opt_candidates) print("added candidates from refinement") @@ -567,7 +575,7 @@ def determine_best_candidate( logger.exception(f"Optimization interrupted: {e}") raise - #need to figure out best candidate here before we return best_optimization + # need to figure out best candidate here before we return best_optimization ai_service_client.log_results( function_trace_id=self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id, speedup_ratio=speedup_ratios, @@ -591,8 +599,11 @@ def refine_optimizations( AIServiceRefinerRequest( original_source_code=code_context.read_writable_code, original_read_only_dependency_code=code_context.read_only_context_code, + original_code_runtime=humanize_runtime(original_code_baseline.runtime), optimized_source_code=opt.candidate.source_code, optimized_explanation=opt.candidate.explanation, + optimized_code_runtime=humanize_runtime(opt.runtime), + speedup=f"{int(performance_gain(original_runtime_ns=original_code_baseline.runtime, optimized_runtime_ns=opt.runtime) * 100)}%", trace_id=trace_id, original_line_profiler_results=original_code_baseline.line_profile_results["str_out"], optimized_line_profiler_results=opt.line_profiler_test_results["str_out"], From 65d297165df7d8e34f3f6a05804a16ec8434ff0c Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Wed, 16 Jul 2025 23:51:54 -0700 Subject: [PATCH 09/21] add RO context Signed-off-by: Saurabh Misra --- codeflash/api/aiservice.py | 2 +- codeflash/optimization/function_optimizer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index b13fe7d8..5963f1ec 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -27,7 +27,7 @@ @dataclass(frozen=True) class AIServiceRefinerRequest: original_source_code: str - original_read_only_dependency_code: str + read_only_dependency_code: str original_code_runtime: str optimized_source_code: str optimized_explanation: str diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index e5695fd6..7f065d4a 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -651,7 +651,7 @@ def refine_optimizations( request = [ AIServiceRefinerRequest( original_source_code=code_context.read_writable_code, - original_read_only_dependency_code=code_context.read_only_context_code, + read_only_dependency_code=code_context.read_only_context_code, original_code_runtime=humanize_runtime(original_code_baseline.runtime), optimized_source_code=opt.candidate.source_code, optimized_explanation=opt.candidate.explanation, From e77da5c78364fcd1146c4c89c13db87cba9a3094 Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Thu, 17 Jul 2025 13:20:01 -0700 Subject: [PATCH 10/21] bugfix --- codeflash/api/aiservice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index 5963f1ec..79b85ac8 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -239,7 +239,7 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest] payload = [ { "original_source_code": opt.original_source_code, - "original_read_only_dependency_code": opt.original_read_only_dependency_code, + "read_only_dependency_code": opt.read_only_dependency_code, "original_line_profiler_results": opt.original_line_profiler_results, "original_code_runtime": opt.original_code_runtime, "optimized_source_code": opt.optimized_source_code, From 19cd5c8882fe7eeef28bd3c0308f5f1a823060f5 Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Thu, 17 Jul 2025 13:56:41 -0700 Subject: [PATCH 11/21] bugfix --- codeflash/optimization/function_optimizer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 7f065d4a..2d797071 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -619,6 +619,8 @@ def create_rank_dictionary_compact(int_array: list[int]) -> dict[int, int]: # Create a dictionary mapping the original index to its rank (its position in the sorted list) return {original_index: rank for rank, original_index in enumerate(sorted_indices)} + if not len(self.valid_optimizations): + return None # need to figure out the best candidate here before we return best_optimization diff_lens_list = [] runtimes_list = [] From 5aab3b8e883f6242e1647e66c5635b8faa4ff9ec Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Thu, 17 Jul 2025 14:50:53 -0700 Subject: [PATCH 12/21] bugfix --- codeflash/optimization/function_optimizer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 2d797071..f738ae6b 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -556,6 +556,7 @@ def determine_best_candidate( ai_service_client=ai_service_client, executor=executor, ) + # filter out empty strings of code more_opt_candidates = [ OptimizedCandidate( source_code=refinement_diffs[i], @@ -563,6 +564,7 @@ def determine_best_candidate( optimization_id=self.valid_optimizations[i].candidate.optimization_id, ) for i in range(len(refinement_diffs)) + if refinement_diffs[i] != "" ] # we no longer need to apply diffs since we are generating the entire code again candidates.extend(more_opt_candidates) From ed6b5b10e2bb96c6ef02efc4ebe3e31df36ca906 Mon Sep 17 00:00:00 2001 From: mohammed Date: Fri, 18 Jul 2025 18:33:39 +0300 Subject: [PATCH 13/21] send tracked refinement optimization data --- codeflash/api/aiservice.py | 23 ++++++++++++---- codeflash/optimization/function_optimizer.py | 29 ++++++++++++-------- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index 79b85ac8..a67ff946 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -34,6 +34,7 @@ class AIServiceRefinerRequest: optimized_code_runtime: str speedup: str trace_id: str + fto_name: str original_line_profiler_results: str optimized_line_profiler_results: str experiment_metadata: ExperimentMetadata | None @@ -114,11 +115,7 @@ def optimize_python_code( # noqa: D417 """ start_time = time.perf_counter() - try: - git_repo_owner, git_repo_name = get_repo_owner_and_name() - except Exception as e: - logger.warning(f"Could not determine repo owner and name: {e}") - git_repo_owner, git_repo_name = None, None + git_repo_owner, git_repo_name = safe_get_repo_owner_and_name() payload = { "source_code": source_code, @@ -236,6 +233,7 @@ def optimize_python_code_line_profiler( # noqa: D417 return [] def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]) -> list[str]: + git_repo_owner, git_repo_name = safe_get_repo_owner_and_name() payload = [ { "original_source_code": opt.original_source_code, @@ -247,11 +245,15 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest] "optimized_line_profiler_results": opt.optimized_line_profiler_results, "optimized_code_runtime": opt.optimized_code_runtime, "speedup": opt.speedup, - "trace_id": opt.trace_id, "python_version": platform.python_version(), "experiment_metadata": opt.experiment_metadata, "codeflash_version": codeflash_version, "lsp_mode": is_LSP_enabled(), + # needed for tracking the refinement behavior + "trace_id": opt.trace_id, + "function_to_optimize": opt.fto_name, + "repo_owner": git_repo_owner, + "repo_name": git_repo_name, } for opt in request ] @@ -406,3 +408,12 @@ class LocalAiServiceClient(AiServiceClient): def get_aiservice_base_url(self) -> str: """Get the base URL for the local AI service.""" return "http://localhost:8000" + + +def safe_get_repo_owner_and_name() -> tuple[str | None, str | None]: + try: + git_repo_owner, git_repo_name = get_repo_owner_and_name() + except Exception as e: + logger.warning(f"Could not determine repo owner and name: {e}") + git_repo_owner, git_repo_name = None, None + return git_repo_owner, git_repo_name diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index f738ae6b..ad5b369b 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -147,7 +147,9 @@ def __init__( self.generate_and_instrument_tests_results: ( tuple[GeneratedTestsList, dict[str, set[FunctionCalledInTest]], OptimizationSet] | None ) = None - self.valid_optimizations: list[BestOptimization] = list() # TODO: Figure out the dataclass type for this + self.valid_optimizations: list[BestOptimization] = ( + list() # TODO: Figure out the dataclass type for this # noqa: C408 + ) def can_be_optimized(self) -> Result[tuple[bool, CodeOptimizationContext, dict[Path, str]], str]: should_run_experiment = self.experiment_id is not None @@ -362,7 +364,7 @@ def determine_best_candidate( from codeflash.models.models import OptimizedCandidate best_optimization: BestOptimization | None = None - best_runtime_until_now = original_code_baseline.runtime + _best_runtime_until_now = original_code_baseline.runtime speedup_ratios: dict[str, float | None] = {} optimized_runtimes: dict[str, float | None] = {} @@ -510,7 +512,6 @@ def determine_best_candidate( winning_replay_benchmarking_test_results=candidate_result.benchmarking_test_results, ) self.valid_optimizations.append(best_optimization) - best_runtime_until_now = best_test_runtime else: tree.add( f"Summed runtime: {humanize_runtime(best_test_runtime)} " @@ -543,11 +544,14 @@ def determine_best_candidate( if len(candidates) == 0 and len(self.valid_optimizations) > 0 and not refinement_done: # TODO: Instead of doing it all at once at the end, do it one by one as the optimizations # are found. This way we can hide the time waiting for the LLM results. + trace_id = self.function_trace_id + if trace_id.endswith(("EXP0", "EXP1")): + trace_id = trace_id[:-4] + exp_type refinement_diffs = self.refine_optimizations( valid_optimizations=self.valid_optimizations, original_code_baseline=original_code_baseline, code_context=code_context, - trace_id=self.function_trace_id[:-4] + exp_type, + trace_id=trace_id, experiment_metadata=ExperimentMetadata( id=self.experiment_id, group="control" if exp_type == "EXP0" else "experiment" ) @@ -555,6 +559,7 @@ def determine_best_candidate( else None, ai_service_client=ai_service_client, executor=executor, + fto_name=self.function_to_optimize.qualified_name, ) # filter out empty strings of code more_opt_candidates = [ @@ -581,13 +586,11 @@ def determine_best_candidate( def diff_length(a: str, b: str) -> int: """Compute the length (in characters) of the unified diff between two strings. - Parameters - ---------- + Args: a (str): Original string. b (str): Modified string. - Returns - ------- + Returns: int: Total number of characters in the diff. """ @@ -604,7 +607,8 @@ def diff_length(a: str, b: str) -> int: return len(diff_text) def create_rank_dictionary_compact(int_array: list[int]) -> dict[int, int]: - """Creates a dictionary from a list of ints, mapping the original index to its rank. + """Create a dictionary from a list of ints, mapping the original index to its rank. + This version uses a more compact, "Pythonic" implementation. Args: @@ -631,7 +635,7 @@ def create_rank_dictionary_compact(int_array: list[int]) -> dict[int, int]: runtimes_list.append(valid_opt.runtime) diff_lens_ranking = create_rank_dictionary_compact(diff_lens_list) runtimes_ranking = create_rank_dictionary_compact(runtimes_list) - overall_ranking = {key: diff_lens_ranking[key] + runtimes_ranking[key] for key in diff_lens_ranking.keys()} + overall_ranking = {key: diff_lens_ranking[key] + runtimes_ranking[key] for key in diff_lens_ranking.keys()} # noqa: SIM118 min_key = min(overall_ranking, key=overall_ranking.get) ai_service_client.log_results( function_trace_id=self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id, @@ -651,6 +655,7 @@ def refine_optimizations( experiment_metadata: ExperimentMetadata | None, ai_service_client: AiServiceClient, executor: concurrent.futures.ThreadPoolExecutor, + fto_name: str, ) -> list[str]: request = [ AIServiceRefinerRequest( @@ -665,13 +670,13 @@ def refine_optimizations( original_line_profiler_results=original_code_baseline.line_profile_results["str_out"], optimized_line_profiler_results=opt.line_profiler_test_results["str_out"], experiment_metadata=experiment_metadata, + fto_name=fto_name, ) for opt in valid_optimizations ] future_refinement_results = executor.submit(ai_service_client.optimize_python_code_refinement, request=request) concurrent.futures.wait([future_refinement_results]) - refinement_results = future_refinement_results.result() - return refinement_results + return future_refinement_results.result() def log_successful_optimization( self, explanation: Explanation, generated_tests: GeneratedTestsList, exp_type: str From 1e7a7cb276b21b5ab8b4d4aa377dfaf1491c43b8 Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Mon, 21 Jul 2025 13:19:47 -0700 Subject: [PATCH 14/21] marker for refinement pr --- codeflash/optimization/function_optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index ad5b369b..90b26906 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -565,7 +565,7 @@ def determine_best_candidate( more_opt_candidates = [ OptimizedCandidate( source_code=refinement_diffs[i], - explanation=self.valid_optimizations[i].candidate.explanation, + explanation="REFINEMENT "+self.valid_optimizations[i].candidate.explanation, optimization_id=self.valid_optimizations[i].candidate.optimization_id, ) for i in range(len(refinement_diffs)) From 3eedbd2d00d4a539358e2baed66abb7cdef336d0 Mon Sep 17 00:00:00 2001 From: mohammed Date: Tue, 22 Jul 2025 23:32:07 +0300 Subject: [PATCH 15/21] refi optimization ids and original optimization ids --- codeflash/api/aiservice.py | 11 ++++++--- codeflash/optimization/function_optimizer.py | 25 +++++++++++++------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index a67ff946..482fb688 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -26,6 +26,7 @@ @dataclass(frozen=True) class AIServiceRefinerRequest: + optimization_id: str original_source_code: str read_only_dependency_code: str original_code_runtime: str @@ -232,10 +233,11 @@ def optimize_python_code_line_profiler( # noqa: D417 console.rule() return [] - def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]) -> list[str]: + def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]) -> dict[str, str]: git_repo_owner, git_repo_name = safe_get_repo_owner_and_name() payload = [ { + "optimization_id": opt.optimization_id, "original_source_code": opt.original_source_code, "read_only_dependency_code": opt.read_only_dependency_code, "original_line_profiler_results": opt.original_line_profiler_results, @@ -280,7 +282,7 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest] except requests.exceptions.RequestException as e: logger.exception(f"Error generating optimization refinements: {e}") ph("cli-optimize-error-caught", {"error": str(e)}) - return [] + return {} if response.status_code == 200: refined_optimizations = response.json()["result"] @@ -294,7 +296,7 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest] logger.error(f"Error generating optimized candidates: {response.status_code} - {error}") ph("cli-optimize-error-response", {"response_status_code": response.status_code, "error": error}) console.rule() - return [] + return {} def log_results( # noqa: D417 self, @@ -303,6 +305,7 @@ def log_results( # noqa: D417 original_runtime: float | None, optimized_runtime: dict[str, float | None] | None, is_correct: dict[str, bool] | None, + metadata: dict[str, any] | None, ) -> None: """Log features to the database. @@ -313,6 +316,7 @@ def log_results( # noqa: D417 - original_runtime (Optional[Dict[str, float]]): The original runtime. - optimized_runtime (Optional[Dict[str, float]]): The optimized runtime. - is_correct (Optional[Dict[str, bool]]): Whether the optimized code is correct. + - metadata (Optional[dict[str, any]]): metadata. """ payload = { @@ -322,6 +326,7 @@ def log_results( # noqa: D417 "optimized_runtime": optimized_runtime, "is_correct": is_correct, "codeflash_version": codeflash_version, + "metadata": metadata, } try: self.make_ai_service_request("/log_features", payload=payload, timeout=5) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index ad5b369b..2aeca81a 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -547,7 +547,8 @@ def determine_best_candidate( trace_id = self.function_trace_id if trace_id.endswith(("EXP0", "EXP1")): trace_id = trace_id[:-4] + exp_type - refinement_diffs = self.refine_optimizations( + # refinement_dict is a dictionary with optimization_id as a key and the refined code as a value + refinement_dict = self.refine_optimizations( valid_optimizations=self.valid_optimizations, original_code_baseline=original_code_baseline, code_context=code_context, @@ -561,15 +562,18 @@ def determine_best_candidate( executor=executor, fto_name=self.function_to_optimize.qualified_name, ) - # filter out empty strings of code + more_opt_candidates = [ OptimizedCandidate( - source_code=refinement_diffs[i], - explanation=self.valid_optimizations[i].candidate.explanation, - optimization_id=self.valid_optimizations[i].candidate.optimization_id, + source_code=code, + explanation=self.valid_optimizations[ + i + ].candidate.explanation, # TODO: handle the new explanation after the refinement + optimization_id=opt_id, ) - for i in range(len(refinement_diffs)) - if refinement_diffs[i] != "" + for i, (opt_id, code) in enumerate(refinement_dict.items()) + # filter out empty strings of code + if code != "" ] # we no longer need to apply diffs since we are generating the entire code again candidates.extend(more_opt_candidates) @@ -637,14 +641,16 @@ def create_rank_dictionary_compact(int_array: list[int]) -> dict[int, int]: runtimes_ranking = create_rank_dictionary_compact(runtimes_list) overall_ranking = {key: diff_lens_ranking[key] + runtimes_ranking[key] for key in diff_lens_ranking.keys()} # noqa: SIM118 min_key = min(overall_ranking, key=overall_ranking.get) + best_optimization = self.valid_optimizations[min_key] ai_service_client.log_results( function_trace_id=self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id, speedup_ratio=speedup_ratios, original_runtime=original_code_baseline.runtime, optimized_runtime=optimized_runtimes, is_correct=is_correct, + metadata={"best_optimization_id": best_optimization.candidate.optimization_id}, ) - return self.valid_optimizations[min_key] + return best_optimization def refine_optimizations( self, @@ -656,9 +662,10 @@ def refine_optimizations( ai_service_client: AiServiceClient, executor: concurrent.futures.ThreadPoolExecutor, fto_name: str, - ) -> list[str]: + ) -> dict[str, str]: request = [ AIServiceRefinerRequest( + optimization_id=opt.candidate.optimization_id, original_source_code=code_context.read_writable_code, read_only_dependency_code=code_context.read_only_context_code, original_code_runtime=humanize_runtime(original_code_baseline.runtime), From 42f0ada769d03b9eabe953a2a6900c5f2d598142 Mon Sep 17 00:00:00 2001 From: mohammed Date: Wed, 23 Jul 2025 15:46:23 +0300 Subject: [PATCH 16/21] send the best optimization id only - not the whole metadata object --- codeflash/api/aiservice.py | 6 +++--- codeflash/optimization/function_optimizer.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index 482fb688..f41cf017 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -305,7 +305,7 @@ def log_results( # noqa: D417 original_runtime: float | None, optimized_runtime: dict[str, float | None] | None, is_correct: dict[str, bool] | None, - metadata: dict[str, any] | None, + best_optimization_id: str | None, ) -> None: """Log features to the database. @@ -316,7 +316,7 @@ def log_results( # noqa: D417 - original_runtime (Optional[Dict[str, float]]): The original runtime. - optimized_runtime (Optional[Dict[str, float]]): The optimized runtime. - is_correct (Optional[Dict[str, bool]]): Whether the optimized code is correct. - - metadata (Optional[dict[str, any]]): metadata. + - best_optimization_id (Optional[str]): The best optimization id. """ payload = { @@ -326,7 +326,7 @@ def log_results( # noqa: D417 "optimized_runtime": optimized_runtime, "is_correct": is_correct, "codeflash_version": codeflash_version, - "metadata": metadata, + "best_optimization_id": best_optimization_id, } try: self.make_ai_service_request("/log_features", payload=payload, timeout=5) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 2aeca81a..a770bca3 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -648,7 +648,7 @@ def create_rank_dictionary_compact(int_array: list[int]) -> dict[int, int]: original_runtime=original_code_baseline.runtime, optimized_runtime=optimized_runtimes, is_correct=is_correct, - metadata={"best_optimization_id": best_optimization.candidate.optimization_id}, + best_optimization_id=best_optimization.candidate.optimization_id, ) return best_optimization From e964ca6ff4785a9119a5bdac436487b5d7d2da07 Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Thu, 24 Jul 2025 19:12:00 -0700 Subject: [PATCH 17/21] works now, todo tiebreaking for same ranks --- codeflash/api/aiservice.py | 20 +++-- codeflash/code_utils/code_utils.py | 43 ++++++++++ codeflash/optimization/function_optimizer.py | 82 ++++---------------- codeflash/result/critic.py | 2 +- 4 files changed, 75 insertions(+), 72 deletions(-) diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index f41cf017..67bfc6f0 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -233,7 +233,7 @@ def optimize_python_code_line_profiler( # noqa: D417 console.rule() return [] - def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]) -> dict[str, str]: + def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]) -> list[OptimizedCandidate]: git_repo_owner, git_repo_name = safe_get_repo_owner_and_name() payload = [ { @@ -282,13 +282,20 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest] except requests.exceptions.RequestException as e: logger.exception(f"Error generating optimization refinements: {e}") ph("cli-optimize-error-caught", {"error": str(e)}) - return {} + return [] if response.status_code == 200: - refined_optimizations = response.json()["result"] + refined_optimizations = response.json()["refinements"] logger.info(f"Generated {len(refined_optimizations)} candidate refinements.") console.rule() - return refined_optimizations + return [ + OptimizedCandidate( + source_code=opt["source_code"], + explanation=opt["explanation"], + optimization_id=opt["optimization_id"][:-4]+"refi", + ) + for opt in refined_optimizations + ] try: error = response.json()["error"] except Exception: @@ -296,7 +303,7 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest] logger.error(f"Error generating optimized candidates: {response.status_code} - {error}") ph("cli-optimize-error-response", {"response_status_code": response.status_code, "error": error}) console.rule() - return {} + return [] def log_results( # noqa: D417 self, @@ -306,6 +313,7 @@ def log_results( # noqa: D417 optimized_runtime: dict[str, float | None] | None, is_correct: dict[str, bool] | None, best_optimization_id: str | None, + optimized_line_profiler_results: dict[str, str] | None, ) -> None: """Log features to the database. @@ -317,6 +325,7 @@ def log_results( # noqa: D417 - optimized_runtime (Optional[Dict[str, float]]): The optimized runtime. - is_correct (Optional[Dict[str, bool]]): Whether the optimized code is correct. - best_optimization_id (Optional[str]): The best optimization id. + -optimized_line_profiler_results: line_profiler results for every candidate mapped to their optimization_id """ payload = { @@ -327,6 +336,7 @@ def log_results( # noqa: D417 "is_correct": is_correct, "codeflash_version": codeflash_version, "best_optimization_id": best_optimization_id, + "optimized_line_profiler_results": optimized_line_profiler_results } try: self.make_ai_service_request("/log_features", payload=payload, timeout=5) diff --git a/codeflash/code_utils/code_utils.py b/codeflash/code_utils/code_utils.py index 82a5b979..3aad0577 100644 --- a/codeflash/code_utils/code_utils.py +++ b/codeflash/code_utils/code_utils.py @@ -1,6 +1,7 @@ from __future__ import annotations import ast +import difflib import os import re import shutil @@ -18,6 +19,48 @@ ImportErrorPattern = re.compile(r"ModuleNotFoundError.*$", re.MULTILINE) +def diff_length(a: str, b: str) -> int: + """Compute the length (in characters) of the unified diff between two strings. + + Args: + a (str): Original string. + b (str): Modified string. + + Returns: + int: Total number of characters in the diff. + + """ + # Split input strings into lines for line-by-line diff + a_lines = a.splitlines(keepends=True) + b_lines = b.splitlines(keepends=True) + + # Compute unified diff + diff_lines = list(difflib.unified_diff(a_lines, b_lines, lineterm="")) + + # Join all lines with newline to calculate total diff length + diff_text = "\n".join(diff_lines) + + return len(diff_text) + + +def create_rank_dictionary_compact(int_array: list[int]) -> dict[int, int]: + """Create a dictionary from a list of ints, mapping the original index to its rank. + + This version uses a more compact, "Pythonic" implementation. + + Args: + int_array: A list of integers. + + Returns: + A dictionary where keys are original indices and values are the + rank of the element in ascending order. + + """ + # Sort the indices of the array based on their corresponding values + sorted_indices = sorted(range(len(int_array)), key=lambda i: int_array[i]) + + # Create a dictionary mapping the original index to its rank (its position in the sorted list) + return {original_index: rank for rank, original_index in enumerate(sorted_indices)} @contextmanager def custom_addopts() -> None: diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index a770bca3..671c5e18 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -37,6 +37,8 @@ has_any_async_functions, module_name_from_file_path, restore_conftest, + diff_length, + create_rank_dictionary_compact, ) from codeflash.code_utils.config_consts import ( INDIVIDUAL_TESTCASE_TIMEOUT, @@ -369,6 +371,7 @@ def determine_best_candidate( speedup_ratios: dict[str, float | None] = {} optimized_runtimes: dict[str, float | None] = {} is_correct = {} + optimized_line_profiler_results: dict[str, str] = {} logger.info( f"Determining best optimization candidate (out of {len(candidates)}) for " @@ -464,7 +467,7 @@ def determine_best_candidate( candidate_result, original_code_baseline.runtime, best_runtime_until_now=None ) and quantity_of_tests_critic(candidate_result): tree.add( - "This candidate is faster than the previous best candidate. 🚀" + "This candidate is faster than the original code. 🚀" ) # TODO: Change this description tree.add(f"Original summed runtime: {humanize_runtime(original_code_baseline.runtime)}") tree.add( @@ -479,6 +482,7 @@ def determine_best_candidate( original_helper_code=original_helper_code, candidate_index=candidate_index, ) + optimized_line_profiler_results[candidate.optimization_id]=line_profile_test_results['str_out'] replay_perf_gain = {} if self.args.benchmark: test_results_by_benchmark = ( @@ -547,8 +551,8 @@ def determine_best_candidate( trace_id = self.function_trace_id if trace_id.endswith(("EXP0", "EXP1")): trace_id = trace_id[:-4] + exp_type - # refinement_dict is a dictionary with optimization_id as a key and the refined code as a value - refinement_dict = self.refine_optimizations( + # refinement_response is a dataclass with optimization_id, code and explanation + refinement_response = self.refine_optimizations( valid_optimizations=self.valid_optimizations, original_code_baseline=original_code_baseline, code_context=code_context, @@ -562,23 +566,9 @@ def determine_best_candidate( executor=executor, fto_name=self.function_to_optimize.qualified_name, ) - - more_opt_candidates = [ - OptimizedCandidate( - source_code=code, - explanation=self.valid_optimizations[ - i - ].candidate.explanation, # TODO: handle the new explanation after the refinement - optimization_id=opt_id, - ) - for i, (opt_id, code) in enumerate(refinement_dict.items()) - # filter out empty strings of code - if code != "" - ] - # we no longer need to apply diffs since we are generating the entire code again - candidates.extend(more_opt_candidates) - print("added candidates from refinement") - original_len += len(more_opt_candidates) + candidates.extend(refinement_response) + print("Added candidates from refinement") + original_len += len(refinement_response) refinement_done = True except KeyboardInterrupt as e: self.write_code_and_helpers( @@ -587,58 +577,17 @@ def determine_best_candidate( logger.exception(f"Optimization interrupted: {e}") raise - def diff_length(a: str, b: str) -> int: - """Compute the length (in characters) of the unified diff between two strings. - - Args: - a (str): Original string. - b (str): Modified string. - - Returns: - int: Total number of characters in the diff. - - """ - # Split input strings into lines for line-by-line diff - a_lines = a.splitlines(keepends=True) - b_lines = b.splitlines(keepends=True) - - # Compute unified diff - diff_lines = list(difflib.unified_diff(a_lines, b_lines, lineterm="")) - - # Join all lines with newline to calculate total diff length - diff_text = "\n".join(diff_lines) - - return len(diff_text) - - def create_rank_dictionary_compact(int_array: list[int]) -> dict[int, int]: - """Create a dictionary from a list of ints, mapping the original index to its rank. - - This version uses a more compact, "Pythonic" implementation. - - Args: - int_array: A list of integers. - - Returns: - A dictionary where keys are original indices and values are the - rank of the element in ascending order. - - """ - # Sort the indices of the array based on their corresponding values - sorted_indices = sorted(range(len(int_array)), key=lambda i: int_array[i]) - - # Create a dictionary mapping the original index to its rank (its position in the sorted list) - return {original_index: rank for rank, original_index in enumerate(sorted_indices)} - if not len(self.valid_optimizations): return None # need to figure out the best candidate here before we return best_optimization - diff_lens_list = [] + diff_lens_list = [] # character level diff runtimes_list = [] for valid_opt in self.valid_optimizations: - diff_lens_list.append(diff_length(valid_opt.candidate.source_code, code_context.read_writable_code)) + diff_lens_list.append(diff_length(valid_opt.candidate.source_code, code_context.read_writable_code)) #char level diff runtimes_list.append(valid_opt.runtime) diff_lens_ranking = create_rank_dictionary_compact(diff_lens_list) runtimes_ranking = create_rank_dictionary_compact(runtimes_list) + # TODO: better way to resolve conflicts with same min ranking overall_ranking = {key: diff_lens_ranking[key] + runtimes_ranking[key] for key in diff_lens_ranking.keys()} # noqa: SIM118 min_key = min(overall_ranking, key=overall_ranking.get) best_optimization = self.valid_optimizations[min_key] @@ -649,6 +598,7 @@ def create_rank_dictionary_compact(int_array: list[int]) -> dict[int, int]: optimized_runtime=optimized_runtimes, is_correct=is_correct, best_optimization_id=best_optimization.candidate.optimization_id, + optimized_line_profiler_results= optimized_line_profiler_results ) return best_optimization @@ -662,7 +612,7 @@ def refine_optimizations( ai_service_client: AiServiceClient, executor: concurrent.futures.ThreadPoolExecutor, fto_name: str, - ) -> dict[str, str]: + ) -> list[OptimizedCandidate]: request = [ AIServiceRefinerRequest( optimization_id=opt.candidate.optimization_id, @@ -680,7 +630,7 @@ def refine_optimizations( fto_name=fto_name, ) for opt in valid_optimizations - ] + ] # TODO: multiple workers for this? future_refinement_results = executor.submit(ai_service_client.optimize_python_code_refinement, request=request) concurrent.futures.wait([future_refinement_results]) return future_refinement_results.result() diff --git a/codeflash/result/critic.py b/codeflash/result/critic.py index aff79195..fa4a68b8 100644 --- a/codeflash/result/critic.py +++ b/codeflash/result/critic.py @@ -48,7 +48,7 @@ def speedup_critic( original_runtime_ns=original_code_runtime, optimized_runtime_ns=candidate_result.best_test_runtime ) if best_runtime_until_now is None: - # collect all optimizations with thi + # collect all optimizations with this return bool(perf_gain > noise_floor) return bool(perf_gain > noise_floor and candidate_result.best_test_runtime < best_runtime_until_now) From beb1ee0b74922c1b69717aa406b1ebc7cd9074a8 Mon Sep 17 00:00:00 2001 From: Aseem Saxena Date: Fri, 25 Jul 2025 13:09:40 -0700 Subject: [PATCH 18/21] precommit mypy fix --- codeflash/api/aiservice.py | 10 +++++++--- codeflash/code_utils/code_utils.py | 2 ++ codeflash/optimization/function_optimizer.py | 20 +++++++++++--------- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index 67bfc6f0..40346b0d 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -54,7 +54,11 @@ def get_aiservice_base_url(self) -> str: return "https://app.codeflash.ai" def make_ai_service_request( - self, endpoint: str, method: str = "POST", payload: dict[str, Any] | None = None, timeout: float | None = None + self, + endpoint: str, + method: str = "POST", + payload: dict[str, Any] | list[dict[str, Any]] | None = None, + timeout: float | None = None, ) -> requests.Response: """Make an API request to the given endpoint on the AI service. @@ -292,7 +296,7 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest] OptimizedCandidate( source_code=opt["source_code"], explanation=opt["explanation"], - optimization_id=opt["optimization_id"][:-4]+"refi", + optimization_id=opt["optimization_id"][:-4] + "refi", ) for opt in refined_optimizations ] @@ -336,7 +340,7 @@ def log_results( # noqa: D417 "is_correct": is_correct, "codeflash_version": codeflash_version, "best_optimization_id": best_optimization_id, - "optimized_line_profiler_results": optimized_line_profiler_results + "optimized_line_profiler_results": optimized_line_profiler_results, } try: self.make_ai_service_request("/log_features", payload=payload, timeout=5) diff --git a/codeflash/code_utils/code_utils.py b/codeflash/code_utils/code_utils.py index 3aad0577..f9f06a3e 100644 --- a/codeflash/code_utils/code_utils.py +++ b/codeflash/code_utils/code_utils.py @@ -19,6 +19,7 @@ ImportErrorPattern = re.compile(r"ModuleNotFoundError.*$", re.MULTILINE) + def diff_length(a: str, b: str) -> int: """Compute the length (in characters) of the unified diff between two strings. @@ -62,6 +63,7 @@ def create_rank_dictionary_compact(int_array: list[int]) -> dict[int, int]: # Create a dictionary mapping the original index to its rank (its position in the sorted list) return {original_index: rank for rank, original_index in enumerate(sorted_indices)} + @contextmanager def custom_addopts() -> None: pyproject_file = find_pyproject_toml() diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 671c5e18..6878d018 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -2,7 +2,6 @@ import ast import concurrent.futures -import difflib import os import random import subprocess @@ -32,13 +31,13 @@ from codeflash.code_utils.code_utils import ( ImportErrorPattern, cleanup_paths, + create_rank_dictionary_compact, + diff_length, file_name_from_test_module_name, get_run_tmp_file, has_any_async_functions, module_name_from_file_path, restore_conftest, - diff_length, - create_rank_dictionary_compact, ) from codeflash.code_utils.config_consts import ( INDIVIDUAL_TESTCASE_TIMEOUT, @@ -363,7 +362,6 @@ def determine_best_candidate( exp_type: str, ) -> BestOptimization | None: # TODO remove - from codeflash.models.models import OptimizedCandidate best_optimization: BestOptimization | None = None _best_runtime_until_now = original_code_baseline.runtime @@ -482,7 +480,9 @@ def determine_best_candidate( original_helper_code=original_helper_code, candidate_index=candidate_index, ) - optimized_line_profiler_results[candidate.optimization_id]=line_profile_test_results['str_out'] + optimized_line_profiler_results[candidate.optimization_id] = line_profile_test_results[ + "str_out" + ] replay_perf_gain = {} if self.args.benchmark: test_results_by_benchmark = ( @@ -580,10 +580,12 @@ def determine_best_candidate( if not len(self.valid_optimizations): return None # need to figure out the best candidate here before we return best_optimization - diff_lens_list = [] # character level diff + diff_lens_list = [] # character level diff runtimes_list = [] for valid_opt in self.valid_optimizations: - diff_lens_list.append(diff_length(valid_opt.candidate.source_code, code_context.read_writable_code)) #char level diff + diff_lens_list.append( + diff_length(valid_opt.candidate.source_code, code_context.read_writable_code) + ) # char level diff runtimes_list.append(valid_opt.runtime) diff_lens_ranking = create_rank_dictionary_compact(diff_lens_list) runtimes_ranking = create_rank_dictionary_compact(runtimes_list) @@ -598,7 +600,7 @@ def determine_best_candidate( optimized_runtime=optimized_runtimes, is_correct=is_correct, best_optimization_id=best_optimization.candidate.optimization_id, - optimized_line_profiler_results= optimized_line_profiler_results + optimized_line_profiler_results=optimized_line_profiler_results, ) return best_optimization @@ -630,7 +632,7 @@ def refine_optimizations( fto_name=fto_name, ) for opt in valid_optimizations - ] # TODO: multiple workers for this? + ] # TODO: multiple workers for this? future_refinement_results = executor.submit(ai_service_client.optimize_python_code_refinement, request=request) concurrent.futures.wait([future_refinement_results]) return future_refinement_results.result() From c0b85ad160bba49990e5b634389ebc1464f9f23e Mon Sep 17 00:00:00 2001 From: Aseem Saxena Date: Fri, 25 Jul 2025 17:23:46 -0700 Subject: [PATCH 19/21] cleaning up --- codeflash/api/aiservice.py | 3 --- codeflash/models/models.py | 1 + codeflash/optimization/function_optimizer.py | 3 +-- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index 40346b0d..c5e706ac 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -316,7 +316,6 @@ def log_results( # noqa: D417 original_runtime: float | None, optimized_runtime: dict[str, float | None] | None, is_correct: dict[str, bool] | None, - best_optimization_id: str | None, optimized_line_profiler_results: dict[str, str] | None, ) -> None: """Log features to the database. @@ -328,7 +327,6 @@ def log_results( # noqa: D417 - original_runtime (Optional[Dict[str, float]]): The original runtime. - optimized_runtime (Optional[Dict[str, float]]): The optimized runtime. - is_correct (Optional[Dict[str, bool]]): Whether the optimized code is correct. - - best_optimization_id (Optional[str]): The best optimization id. -optimized_line_profiler_results: line_profiler results for every candidate mapped to their optimization_id """ @@ -339,7 +337,6 @@ def log_results( # noqa: D417 "optimized_runtime": optimized_runtime, "is_correct": is_correct, "codeflash_version": codeflash_version, - "best_optimization_id": best_optimization_id, "optimized_line_profiler_results": optimized_line_profiler_results, } try: diff --git a/codeflash/models/models.py b/codeflash/models/models.py index c29b7cc6..9b396edb 100644 --- a/codeflash/models/models.py +++ b/codeflash/models/models.py @@ -75,6 +75,7 @@ def __hash__(self) -> int: class BestOptimization(BaseModel): candidate: OptimizedCandidate + helper_functions: list[FunctionSource] code_context: CodeOptimizationContext runtime: int replay_performance_gain: Optional[dict[BenchmarkKey, float]] = None diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 6878d018..bab51604 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -361,8 +361,6 @@ def determine_best_candidate( file_path_to_helper_classes: dict[Path, set[str]], exp_type: str, ) -> BestOptimization | None: - # TODO remove - best_optimization: BestOptimization | None = None _best_runtime_until_now = original_code_baseline.runtime @@ -507,6 +505,7 @@ def determine_best_candidate( best_optimization = BestOptimization( candidate=candidate, + helper_functions=code_context.helper_functions, code_context=code_context, runtime=best_test_runtime, line_profiler_test_results=line_profile_test_results, From ef80323201c565395a4bbb0be752a000a81146c0 Mon Sep 17 00:00:00 2001 From: Aseem Saxena Date: Fri, 25 Jul 2025 17:59:08 -0700 Subject: [PATCH 20/21] further streamlining --- codeflash/api/aiservice.py | 57 +++++++++----------------------------- codeflash/models/models.py | 16 +++++++++++ 2 files changed, 29 insertions(+), 44 deletions(-) diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index c5e706ac..62d78baf 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -7,14 +7,13 @@ from typing import TYPE_CHECKING, Any import requests -from pydantic.dataclasses import dataclass from pydantic.json import pydantic_encoder from codeflash.cli_cmds.console import console, logger from codeflash.code_utils.env_utils import get_codeflash_api_key, is_LSP_enabled from codeflash.code_utils.git_utils import get_last_commit_author_if_pr_exists, get_repo_owner_and_name from codeflash.models.ExperimentMetadata import ExperimentMetadata -from codeflash.models.models import OptimizedCandidate +from codeflash.models.models import AIServiceRefinerRequest, OptimizedCandidate from codeflash.telemetry.posthog_cf import ph from codeflash.version import __version__ as codeflash_version @@ -22,23 +21,8 @@ from pathlib import Path from codeflash.discovery.functions_to_optimize import FunctionToOptimize - - -@dataclass(frozen=True) -class AIServiceRefinerRequest: - optimization_id: str - original_source_code: str - read_only_dependency_code: str - original_code_runtime: str - optimized_source_code: str - optimized_explanation: str - optimized_code_runtime: str - speedup: str - trace_id: str - fto_name: str - original_line_profiler_results: str - optimized_line_profiler_results: str - experiment_metadata: ExperimentMetadata | None + from codeflash.models.ExperimentMetadata import ExperimentMetadata + from codeflash.models.models import AIServiceRefinerRequest class AiServiceClient: @@ -238,7 +222,16 @@ def optimize_python_code_line_profiler( # noqa: D417 return [] def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]) -> list[OptimizedCandidate]: - git_repo_owner, git_repo_name = safe_get_repo_owner_and_name() + """Optimize the given python code for performance by making a request to the Django endpoint. + + Args: + request: A list of optimization candidate details for refinement + + Returns: + ------- + - List[OptimizationCandidate]: A list of Optimization Candidates. + + """ payload = [ { "optimization_id": opt.optimization_id, @@ -251,34 +244,10 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest] "optimized_line_profiler_results": opt.optimized_line_profiler_results, "optimized_code_runtime": opt.optimized_code_runtime, "speedup": opt.speedup, - "python_version": platform.python_version(), - "experiment_metadata": opt.experiment_metadata, - "codeflash_version": codeflash_version, - "lsp_mode": is_LSP_enabled(), - # needed for tracking the refinement behavior "trace_id": opt.trace_id, - "function_to_optimize": opt.fto_name, - "repo_owner": git_repo_owner, - "repo_name": git_repo_name, } for opt in request ] - """Optimize the given python code for performance by making a request to the Django endpoint. - - Parameters - ---------- - - source_code (str): The python code to optimize. - - dependency_code (str): The dependency code used as read-only context for the optimization - - trace_id (str): Trace id of optimization run - - num_candidates (int): Number of optimization variants to generate. Default is 10. - - experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization - - Returns - ------- - - List[OptimizationCandidate]: A list of Optimization Candidates. - - """ - logger.info(f"Refining {len(request)} optimizations…") console.rule() try: diff --git a/codeflash/models/models.py b/codeflash/models/models.py index 9b396edb..369fd51b 100644 --- a/codeflash/models/models.py +++ b/codeflash/models/models.py @@ -27,6 +27,22 @@ from codeflash.code_utils.env_utils import is_end_to_end from codeflash.verification.comparator import comparator + +@dataclass(frozen=True) +class AIServiceRefinerRequest: + optimization_id: str + original_source_code: str + read_only_dependency_code: str + original_code_runtime: str + optimized_source_code: str + optimized_explanation: str + optimized_code_runtime: str + speedup: str + trace_id: str + original_line_profiler_results: str + optimized_line_profiler_results: str + + # If the method spam is in the class Ham, which is at the top level of the module eggs in the package foo, the fully # qualified name of the method is foo.eggs.Ham.spam, its qualified name is Ham.spam, and its name is spam. The full name # of the module is foo.eggs. From 65d766d97f398f787b85e20d90a3ddbaf290cbc9 Mon Sep 17 00:00:00 2001 From: Aseem Saxena Date: Fri, 25 Jul 2025 18:26:35 -0700 Subject: [PATCH 21/21] bugfix --- codeflash/optimization/function_optimizer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index bab51604..8d9059d9 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -598,7 +598,6 @@ def determine_best_candidate( original_runtime=original_code_baseline.runtime, optimized_runtime=optimized_runtimes, is_correct=is_correct, - best_optimization_id=best_optimization.candidate.optimization_id, optimized_line_profiler_results=optimized_line_profiler_results, ) return best_optimization