codeflash-ai
diff --git a/‎codeflash/api/aiservice.py‎
Lines changed: 64 additions & 15 deletions b/‎codeflash/api/aiservice.py‎
Lines changed: 64 additions & 15 deletions
diff --git a/‎codeflash/code_utils/code_utils.py‎
Lines changed: 4 additions & 0 deletions b/‎codeflash/code_utils/code_utils.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎codeflash/code_utils/config_consts.py‎
Lines changed: 4 additions & 0 deletions b/‎codeflash/code_utils/config_consts.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎codeflash/models/models.py‎
Lines changed: 80 additions & 3 deletions b/‎codeflash/models/models.py‎
Lines changed: 80 additions & 3 deletions
@@ -18,7 +18,12 @@
 from codeflash.code_utils.time_utils import humanize_runtime
 from codeflash.lsp.helpers import is_LSP_enabled
 from codeflash.models.ExperimentMetadata import ExperimentMetadata
-from codeflash.models.models import AIServiceRefinerRequest, CodeStringsMarkdown, OptimizedCandidate
+from codeflash.models.models import (
+    AIServiceRefinerRequest,
+    CodeStringsMarkdown,
+    OptimizedCandidate,
+    OptimizedCandidateSource,
+)
 from codeflash.telemetry.posthog_cf import ph
 from codeflash.version import __version__ as codeflash_version
 
@@ -27,7 +32,7 @@
 
     from codeflash.discovery.functions_to_optimize import FunctionToOptimize
     from codeflash.models.ExperimentMetadata import ExperimentMetadata
-    from codeflash.models.models import AIServiceRefinerRequest
+    from codeflash.models.models import AIServiceCodeRepairRequest, AIServiceRefinerRequest
     from codeflash.result.explanation import Explanation
 
 
@@ -86,15 +91,21 @@ def make_ai_service_request(
         # response.raise_for_status()  # Will raise an HTTPError if the HTTP request returned an unsuccessful status code
         return response
 
-    def _get_valid_candidates(self, optimizations_json: list[dict[str, Any]]) -> list[OptimizedCandidate]:
+    def _get_valid_candidates(
+        self, optimizations_json: list[dict[str, Any]], source: OptimizedCandidateSource
+    ) -> list[OptimizedCandidate]:
         candidates: list[OptimizedCandidate] = []
         for opt in optimizations_json:
             code = CodeStringsMarkdown.parse_markdown_code(opt["source_code"])
             if not code.code_strings:
                 continue
             candidates.append(
                 OptimizedCandidate(
-                    source_code=code, explanation=opt["explanation"], optimization_id=opt["optimization_id"]
+                    source_code=code,
+                    explanation=opt["explanation"],
+                    optimization_id=opt["optimization_id"],
+                    source=source,
+                    parent_id=opt.get("parent_id", None),
                 )
             )
         return candidates
@@ -156,7 +167,7 @@ def optimize_python_code(  # noqa: D417
             console.rule()
             end_time = time.perf_counter()
             logger.debug(f"!lsp|Generating possible optimizations took {end_time - start_time:.2f} seconds.")
-            return self._get_valid_candidates(optimizations_json)
+            return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE)
         try:
             error = response.json()["error"]
         except Exception:
@@ -221,7 +232,7 @@ def optimize_python_code_line_profiler(  # noqa: D417
                 f"!lsp|Generated {len(optimizations_json)} candidate optimizations using line profiler information."
             )
             console.rule()
-            return self._get_valid_candidates(optimizations_json)
+            return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE_LP)
         try:
             error = response.json()["error"]
         except Exception:
@@ -270,15 +281,7 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]
         if response.status_code == 200:
             refined_optimizations = response.json()["refinements"]
 
-            refinements = self._get_valid_candidates(refined_optimizations)
-            return [
-                OptimizedCandidate(
-                    source_code=c.source_code,
-                    explanation=c.explanation,
-                    optimization_id=c.optimization_id[:-4] + "refi",
-                )
-                for c in refinements
-            ]
+            return self._get_valid_candidates(refined_optimizations, OptimizedCandidateSource.REFINE)
 
         try:
             error = response.json()["error"]
@@ -289,6 +292,52 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]
         console.rule()
         return []
 
+    def code_repair(self, request: AIServiceCodeRepairRequest) -> OptimizedCandidate | None:
+        """Repair the optimization candidate that is not matching the test result of the original code.
+
+        Args:
+        request: candidate details for repair
+
+        Returns:
+        -------
+        - OptimizedCandidate: new fixed candidate.
+
+        """
+        console.rule()
+        try:
+            payload = {
+                "optimization_id": request.optimization_id,
+                "original_source_code": request.original_source_code,
+                "modified_source_code": request.modified_source_code,
+                "trace_id": request.trace_id,
+                "test_diffs": request.test_diffs,
+            }
+            response = self.make_ai_service_request("/code_repair", payload=payload, timeout=120)
+        except (requests.exceptions.RequestException, TypeError) as e:
+            logger.exception(f"Error generating optimization repair: {e}")
+            ph("cli-optimize-error-caught", {"error": str(e)})
+            return None
+
+        if response.status_code == 200:
+            fixed_optimization = response.json()
+            console.rule()
+
+            valid_candidates = self._get_valid_candidates([fixed_optimization], OptimizedCandidateSource.REPAIR)
+            if not valid_candidates:
+                logger.error("Code repair failed to generate a valid candidate.")
+                return None
+
+            return valid_candidates[0]
+
+        try:
+            error = response.json()["error"]
+        except Exception:
+            error = response.text
+        logger.error(f"Error generating optimized candidates: {response.status_code} - {error}")
+        ph("cli-optimize-error-response", {"response_status_code": response.status_code, "error": error})
+        console.rule()
+        return None
+
     def get_new_explanation(  # noqa: D417
         self,
         source_code: str,
 
@@ -423,6 +423,10 @@ def exit_with_message(message: str, *, error_on_exit: bool = False) -> None:
     sys.exit(1 if error_on_exit else 0)
 
 
+def shorten_pytest_error(pytest_error_string: str) -> str:
+    return "\n".join(re.findall(r"^[E>] +(.*)$", pytest_error_string, re.MULTILINE))
+
+
 def extract_unique_errors(pytest_output: str) -> set[str]:
     unique_errors = set()
 
 
@@ -25,6 +25,10 @@
 TOTAL_LOOPING_TIME_LSP = 10.0  # Kept same timing for LSP mode to avoid in increase in performance reporting
 N_CANDIDATES_LP_LSP = 3
 
+# Code repair
+REPAIR_UNMATCHED_PERCENTAGE_LIMIT = 0.4  # if the percentage of unmatched tests is greater than this, we won't fix it (lowering this value makes the repair more stricted)
+MAX_REPAIRS_PER_TRACE = 4  # maximum number of repairs we will do for each function
+
 MAX_N_CANDIDATES = 5
 MAX_N_CANDIDATES_LP = 6
 
 
@@ -3,6 +3,7 @@
 from collections import Counter, defaultdict
 from typing import TYPE_CHECKING
 
+import libcst as cst
 from rich.tree import Tree
 
 from codeflash.cli_cmds.console import DEBUG_MODE, lsp_log
@@ -47,6 +48,34 @@ class AIServiceRefinerRequest:
     function_references: str | None = None
 
 
+class TestDiffScope(str, Enum):
+    RETURN_VALUE = "return_value"
+    STDOUT = "stdout"
+    DID_PASS = "did_pass"  # noqa: S105
+
+
+@dataclass
+class TestDiff:
+    scope: TestDiffScope
+    original_pass: bool
+    candidate_pass: bool
+
+    original_value: str | None = None
+    candidate_value: str | None = None
+    test_src_code: Optional[str] = None
+    candidate_pytest_error: Optional[str] = None
+    original_pytest_error: Optional[str] = None
+
+
+@dataclass(frozen=True)
+class AIServiceCodeRepairRequest:
+    optimization_id: str
+    original_source_code: str
+    modified_source_code: str
+    trace_id: str
+    test_diffs: list[TestDiff]
+
+
 # If the method spam is in the class Ham, which is at the top level of the module eggs in the package foo, the fully
 # qualified name of the method is foo.eggs.Ham.spam, its qualified name is Ham.spam, and its name is spam. The full name
 # of the module is foo.eggs.
@@ -243,12 +272,12 @@ def parse_markdown_code(markdown_code: str) -> CodeStringsMarkdown:
 
         """
         matches = markdown_pattern.findall(markdown_code)
-        results = CodeStringsMarkdown()
+        code_string_list = []
         try:
             for file_path, code in matches:
                 path = file_path.strip()
-                results.code_strings.append(CodeString(code=code, file_path=Path(path)))
-            return results  # noqa: TRY300
+                code_string_list.append(CodeString(code=code, file_path=Path(path)))
+            return CodeStringsMarkdown(code_strings=code_string_list)
         except ValidationError:
             # if any file is invalid, return an empty CodeStringsMarkdown for the entire context
             return CodeStringsMarkdown()
@@ -421,11 +450,20 @@ class TestsInFile:
     test_type: TestType
 
 
+class OptimizedCandidateSource(str, Enum):
+    OPTIMIZE = "OPTIMIZE"
+    OPTIMIZE_LP = "OPTIMIZE_LP"
+    REFINE = "REFINE"
+    REPAIR = "REPAIR"
+
+
 @dataclass(frozen=True)
 class OptimizedCandidate:
     source_code: CodeStringsMarkdown
     explanation: str
     optimization_id: str
+    source: OptimizedCandidateSource
+    parent_id: str | None = None
 
 
 @dataclass(frozen=True)
@@ -572,6 +610,42 @@ def id(self) -> str:
             f"{self.function_getting_tested}:{self.iteration_id}"
         )
 
+    # TestSuiteClass.test_function_name
+    def test_fn_qualified_name(self) -> str:
+        # Use f-string with inline conditional to reduce string concatenation operations
+        return (
+            f"{self.test_class_name}.{self.test_function_name}"
+            if self.test_class_name
+            else str(self.test_function_name)
+        )
+
+    def find_func_in_class(self, class_node: cst.ClassDef, func_name: str) -> Optional[cst.FunctionDef]:
+        for stmt in class_node.body.body:
+            if isinstance(stmt, cst.FunctionDef) and stmt.name.value == func_name:
+                return stmt
+        return None
+
+    def get_src_code(self, test_path: Path) -> Optional[str]:
+        if not test_path.exists():
+            return None
+        test_src = test_path.read_text(encoding="utf-8")
+        module_node = cst.parse_module(test_src)
+
+        if self.test_class_name:
+            for stmt in module_node.body:
+                if isinstance(stmt, cst.ClassDef) and stmt.name.value == self.test_class_name:
+                    func_node = self.find_func_in_class(stmt, self.test_function_name)
+                    if func_node:
+                        return module_node.code_for_node(func_node).strip()
+            # class not found
+            return None
+
+        # Otherwise, look for a top level function
+        for stmt in module_node.body:
+            if isinstance(stmt, cst.FunctionDef) and stmt.name.value == self.test_function_name:
+                return module_node.code_for_node(stmt).strip()
+        return None
+
     @staticmethod
     def from_str_id(string_id: str, iteration_id: str | None = None) -> InvocationId:
         components = string_id.split(":")
@@ -616,7 +690,10 @@ class TestResults(BaseModel):  # noqa: PLW1641
     # also we don't support deletion of test results elements - caution is advised
     test_results: list[FunctionTestInvocation] = []
     test_result_idx: dict[str, int] = {}
+
     perf_stdout: Optional[str] = None
+    # mapping between test function name and stdout failure message
+    test_failures: Optional[dict[str, str]] = None
 
     def add(self, function_test_invocation: FunctionTestInvocation) -> None:
         unique_id = function_test_invocation.unique_invocation_loop_id