Patchflow builder (#645)

* Add json only llm call * lint * save * update * update * bump to dev2 * retype * update * update steps * update modify code to create as well * updates * Add PRPB * add PRPB * remove readme * changes * fix small bugs * save * update * bump and fix * Allow PRPB to accept path and dict, Add type to CallCode2Prompt outputs * SimplifidedLLMPB change variable * add again * some linting * Patched /home/runner/work/patchwork/patchwork/patchwork/steps/FilterBySimilarity/README.md (#679) Co-authored-by: patched.codes[bot] <298395+patched.codes[bot]@users.noreply.github.com> * fix circular import * changes * fix test case --------- Co-authored-by: Asankhaya Sharma <[email protected]> Co-authored-by: Patched <[email protected]> Co-authored-by: patched.codes[bot] <298395+patched.codes[bot]@users.noreply.github.com>
patched-codes · Aug 23, 2024 · b6fbfbf · b6fbfbf
1 parent 032dc27
commit b6fbfbf
Show file tree

Hide file tree

Showing 31 changed files with 642 additions and 374 deletions.
diff --git a/patchwork/app.py b/patchwork/app.py
@@ -14,8 +14,8 @@
 from typing_extensions import Iterable
 
 from patchwork.common.client.patched import PatchedClient
+from patchwork.common.constants import PROMPT_TEMPLATE_FILE_KEY
 from patchwork.logger import init_cli_logger, logger
-from patchwork.steps.PreparePrompt import PreparePrompt
 
 _DATA_FORMAT_MAPPING = {
     "yaml": yaml.dump,
@@ -157,7 +157,7 @@ def cli(
 
                 patchwork_prompt_path = patchwork_path / _PROMPT_NAME
                 if patchwork_prompt_path.is_file():
-                    inputs[PreparePrompt.PROMPT_TEMPLATE_FILE_KEY] = patchwork_prompt_path
+                    inputs[PROMPT_TEMPLATE_FILE_KEY] = patchwork_prompt_path
                     logger.info(f"Prompt template loaded from {patchwork_prompt_path}")
                 else:
                     logger.debug(

diff --git a/patchwork/common/client/scm.py b/patchwork/common/client/scm.py
@@ -176,6 +176,7 @@ def find_prs(
         state: PullRequestState | None = None,
         original_branch: str | None = None,
         feature_branch: str | None = None,
+        limit: int | None = None,
     ) -> list[PullRequestProtocol]:
         ...
 
@@ -341,7 +342,10 @@ def texts(self) -> PullRequestTexts:
         return dict(
             title=self._pr.title or "",
             body=self._pr.body or "",
-            comments=[dict(user=comment.user.name, body=comment.body) for comment in self._pr.get_comments()],
+            comments=[
+                dict(user=comment.user.name, body=comment.body)
+                for comment in itertools.chain(self._pr.get_comments(), self._pr.get_issue_comments())
+            ],
             # None checks for binary files
             diffs={file.filename: file.patch for file in self._pr.get_files() if file.patch is not None},
         )
@@ -417,6 +421,7 @@ def find_prs(
         state: PullRequestState | None = None,
         original_branch: str | None = None,
         feature_branch: str | None = None,
+        limit: int | None = None,
     ) -> list[GithubPullRequest]:
         repo = self.github.get_repo(slug)
         kwargs_list = dict(state=[None], target_branch=[None], source_branch=[None])
@@ -428,12 +433,12 @@ def find_prs(
         if feature_branch is not None:
             kwargs_list["head"] = [feature_branch]  # type: ignore
 
-        prs = []
+        page_list = []
         keys = kwargs_list.keys()
         for instance in itertools.product(*kwargs_list.values()):
             kwargs = dict(((key, value) for key, value in zip(keys, instance) if value is not None))
             pages = repo.get_pulls(**kwargs)
-            prs.extend(iter(pages))
+            page_list.append(pages)
 
         branch_checker = lambda pr: True
         if original_branch is not None:
@@ -442,7 +447,11 @@ def find_prs(
             branch_checker = lambda pr: branch_checker and pr.head.ref == feature_branch
 
         # filter out PRs that are not the ones we are looking for
-        return [GithubPullRequest(pr) for pr in prs if branch_checker(pr)]
+        rv_list = []
+        for pr in itertools.islice(itertools.chain(*page_list), limit):
+            if branch_checker(pr):
+                rv_list.append(GithubPullRequest(pr))
+        return rv_list
 
     def create_pr(
         self,
@@ -541,6 +550,7 @@ def find_prs(
         state: PullRequestState | None = None,
         original_branch: str | None = None,
         feature_branch: str | None = None,
+        limit: int | None = None,
     ) -> list[PullRequestProtocol]:
         project = self.gitlab.projects.get(slug)
         kwargs_list = dict(iterator=[True], state=[None], target_branch=[None], source_branch=[None])
@@ -552,14 +562,18 @@ def find_prs(
         if feature_branch is not None:
             kwargs_list["source_branch"] = [feature_branch]  # type: ignore
 
-        mrs = []
+        page_list = []
         keys = kwargs_list.keys()
         for instance in itertools.product(*kwargs_list.values()):
             kwargs = dict(((key, value) for key, value in zip(keys, instance) if value is not None))
             mrs_instance = project.mergerequests.list(**kwargs)
-            mrs.extend(mrs_instance)
+            page_list.append(mrs_instance)
 
-        return [GitlabMergeRequest(mr) for mr in mrs]
+        rv_list = []
+        for mr in itertools.islice(itertools.chain(mrs), limit):
+            rv_list.append(GitlabMergeRequest(mr))
+
+        return rv_list
 
     def create_pr(
         self,

diff --git a/patchwork/common/constants.py b/patchwork/common/constants.py
@@ -0,0 +1,3 @@
+TOKEN_URL = "https://app.patched.codes/signin"
+DEFAULT_PATCH_URL = "https://patchwork.patched.codes/v1"
+PROMPT_TEMPLATE_FILE_KEY = "prompt_template_file"
diff --git a/patchwork/logger.py b/patchwork/logger.py
@@ -95,7 +95,7 @@ def panel(self, title: str):
         if self.__progress_bar is not None:
             renderables.append(self.__progress_bar)
 
-        self.__live = Live(Group(*renderables), console=console, vertical_overflow="crop")
+        self.__live = Live(Group(*renderables), console=console, vertical_overflow="visible")
         try:
             self.__live.start()
             yield

diff --git a/patchwork/steps/CallCode2Prompt/typed.py b/patchwork/steps/CallCode2Prompt/typed.py
@@ -13,5 +13,12 @@ class CallCode2PromptInputs(__CallCode2PromptRequiredInputs, total=False):
     markdown_file_name: Annotated[str, StepTypeConfig(is_config=True)]
 
 
+class FileToPatch(TypedDict):
+    uri: str
+    startLine: int
+    endLine: int
+    fullContent: str
+
+
 class CallCode2PromptOutputs(TypedDict):
-    files_to_patch: Iterable[dict]
+    files_to_patch: Iterable[FileToPatch]
diff --git a/patchwork/steps/CallLLM/CallLLM.py b/patchwork/steps/CallLLM/CallLLM.py
@@ -7,18 +7,19 @@
 from pprint import pformat
 from textwrap import indent
 
+from rich.markup import escape
+
 from patchwork.common.client.llm.aio import AioLlmClient
 from patchwork.common.client.llm.anthropic import AnthropicLlmClient
 from patchwork.common.client.llm.google import GoogleLlmClient
 from patchwork.common.client.llm.openai import OpenAiLlmClient
+from patchwork.common.constants import DEFAULT_PATCH_URL, TOKEN_URL
 from patchwork.logger import logger
 from patchwork.step import Step, StepStatus
-
-TOKEN_URL = "https://app.patched.codes/signin"
-_DEFAULT_PATCH_URL = "https://patchwork.patched.codes/v1"
+from patchwork.steps.CallLLM.typed import CallLLMInputs, CallLLMOutputs
 
 
-class CallLLM(Step):
+class CallLLM(Step, input_class=CallLLMInputs, output_class=CallLLMOutputs):
     def __init__(self, inputs: dict):
         super().__init__(inputs)
         # Set 'openai_key' from inputs or environment if not already set
@@ -49,7 +50,7 @@ def __init__(self, inputs: dict):
 
         patched_key = inputs.get("patched_api_key")
         if patched_key is not None:
-            client = OpenAiLlmClient(patched_key, _DEFAULT_PATCH_URL)
+            client = OpenAiLlmClient(patched_key, DEFAULT_PATCH_URL)
             clients.append(client)
 
         openai_key = inputs.get("openai_api_key") or os.environ.get("OPENAI_API_KEY")
@@ -76,7 +77,7 @@ def __init__(self, inputs: dict):
                 "Please copy the access token that is generated, "
                 "and add `--patched_api_key=<token>` to the command line.\n"
                 "\n"
-                "If you are using a OpenAI API Key, please set `--openai_api_key=<token>`.\n"
+                "If you are using an OpenAI API Key, please set `--openai_api_key=<token>`.\n"
             )
 
         self.client = AioLlmClient(*clients)
@@ -126,7 +127,7 @@ def __call(self, prompts: list[dict]) -> list[str]:
         parsed_model_args = self.__parse_model_args()
 
         for prompt in prompts:
-            logger.trace(f"Message sent: \n{indent(pformat(prompt), '  ')}")
+            logger.trace(f"Message sent: \n{escape(indent(pformat(prompt), '  '))}")
             try:
                 completion = self.client.chat_completion(model=self.model, messages=prompt, **parsed_model_args)
             except Exception as e:
@@ -147,7 +148,7 @@ def __call(self, prompts: list[dict]) -> list[str]:
                     content = ""
             else:
                 content = completion.choices[0].message.content
-                logger.trace(f"Response received: \n{indent(content, '  ')}")
+                logger.trace(f"Response received: \n{escape(indent(content, '  '))}")
 
             contents.append(content)
 

diff --git a/patchwork/steps/CallLLM/typed.py b/patchwork/steps/CallLLM/typed.py
@@ -1,7 +1,7 @@
 from typing_extensions import Annotated, Dict, List, TypedDict
 
+from patchwork.common.constants import TOKEN_URL
 from patchwork.common.utils.step_typing import StepTypeConfig
-from patchwork.steps.CallLLM.CallLLM import TOKEN_URL
 
 
 class CallLLMInputs(TypedDict, total=False):

diff --git a/patchwork/steps/Combine/Combine.py b/patchwork/steps/Combine/Combine.py
@@ -18,7 +18,7 @@ def run(self):
         base_list = isinstance(self.base, list)
         update_list = isinstance(self.update, list)
         if not base_list and not update_list:
-            return dict(result_json={**self.base, **self.update})
+            return {**self.base, **self.update}
 
         if base_list and update_list:
             final_output = []

diff --git a/patchwork/steps/FilterBySimilarity/README.md b/patchwork/steps/FilterBySimilarity/README.md
@@ -0,0 +1,21 @@
+## Input and Output Data Handling for FilterBySimilarity Step
+
+This documentation provides an overview of the content and structure of three Python files related to a step called `FilterBySimilarity` within a larger system (possibly an ML pipeline).
+
+### Inputs
+- `FilterBySimilarityInputs` class defines the expected input structure for the step, including:
+  - `list`: A list of dictionaries.
+  - `keywords`: A string annotated as configuration data.
+  - `keys`: A string annotated as configuration data.
+  - `top_k`: An integer annotated as configuration data.
+
+### Outputs
+- `FilterBySimilarityOutputs` class defines the output structure for the step, including:
+  - `result_list`: A list of dictionaries containing filtered items based on similarity.
+
+### Code Functionality
+- The code within `FilterBySimilarity.py` file implements the logic for the `FilterBySimilarity` step.
+- It utilizes TF-IDF vectorization and cosine similarity to calculate the similarity between provided keywords and text items in the input list of dictionaries.
+- The step function processes the input data, calculates similarity scores, and returns a filtered list of items based on similarity.
+- A logger and several helper functions are used for processing input data and performing necessary calculations.
+- The file `__init__.py` is empty, serving as an initialization file for the package but does not contain any code logic.
diff --git a/patchwork/steps/JoinListPB/JoinListPB.py b/patchwork/steps/JoinListPB/JoinListPB.py
@@ -15,7 +15,7 @@ def run(self):
         join_list = JoinList(
             {
                 **self.inputs,
-                "list": [item for item in self.list if item.get(self.key) is not None],
+                "list": [item.get(self.key) for item in self.list if item.get(self.key) is not None],
             }
         )
         join_list_output = join_list.run()

diff --git a/patchwork/steps/ModifyCode/ModifyCode.py b/patchwork/steps/ModifyCode/ModifyCode.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+from pathlib import Path
+
 from patchwork.step import Step, StepStatus
 
 
@@ -30,15 +32,23 @@ def handle_indent(src: list[str], target: list[str], start: int, end: int) -> li
     return [indent + line for line in target]
 
 
-def replace_code_in_file(file_path, start_line, end_line, new_code):
-    """Replaces specified lines in a file with new code."""
-    with open(file_path, "r") as file:
-        text = file.read()
+def replace_code_in_file(
+        file_path: str,
+        start_line: int | None,
+        end_line: int | None,
+        new_code: str,
+) -> None:
+    path = Path(file_path)
+    if path.exists():
+        """Replaces specified lines in a file with new code."""
+        text = path.read_text()
 
-    lines = text.splitlines(keepends=True)
+        lines = text.splitlines(keepends=True)
 
-    # Insert the new code at the start line after converting it into a list of lines
-    lines[start_line:end_line] = handle_indent(lines, new_code.splitlines(keepends=True), start_line, end_line)
+        # Insert the new code at the start line after converting it into a list of lines
+        lines[start_line:end_line] = handle_indent(lines, new_code.splitlines(keepends=True), start_line, end_line)
+    else:
+        lines = new_code.splitlines(keepends=True)
 
     # Save the modified contents back to the file
     save_file_contents(file_path, "".join(lines))
@@ -67,11 +77,11 @@ def run(self) -> dict:
             return dict(modified_code_files=[])
 
         for code_snippet, extracted_response in sorted_list:
-            uri = code_snippet["uri"]
-            start_line = code_snippet["startLine"]
-            end_line = code_snippet["endLine"]
+            uri = code_snippet.get("uri")
+            start_line = code_snippet.get("startLine")
+            end_line = code_snippet.get("endLine")
             new_code = extracted_response.get("patch")
-            if new_code is None or new_code == "":
+            if new_code is None:
                 continue
 
             replace_code_in_file(uri, start_line, end_line, new_code)

diff --git a/patchwork/steps/ModifyCodePB/ModifyCodePB.py b/patchwork/steps/ModifyCodePB/ModifyCodePB.py
@@ -8,24 +8,27 @@
 class ModifyCodePB(Step, input_class=ModifyCodePBInputs, output_class=ModifyCodePBOutputs):
     def __init__(self, inputs: dict):
         super().__init__(inputs)
-        self.files_with_patch = inputs["files_with_patch"]
+        self.file_path = inputs["file_path"]
+        self.start_line = inputs["start_line"]
+        self.end_line = inputs["end_line"]
+        self.patch = inputs["new_code"]
 
     def run(self) -> dict:
         modify_code = ModifyCode(
             {
                 "files_to_patch": [
                     dict(
-                        uri=self.files_with_patch.get("file_path"),
-                        startLine=self.files_with_patch.get("start_line"),
-                        endLine=self.files_with_patch.get("end_line"),
+                        uri=self.file_path,
+                        startLine=self.start_line,
+                        endLine=self.end_line,
                     )
                 ],
                 "extracted_responses": [
                     dict(
-                        patch=self.files_with_patch.get("patch"),
+                        patch=self.patch,
                     )
                 ],
             }
         )
         modified_code_files = modify_code.run()
-        return dict(**modified_code_files[0])
+        return modified_code_files.get("modified_code_files", [{}])[0]
diff --git a/patchwork/steps/ModifyCodePB/typed.py b/patchwork/steps/ModifyCodePB/typed.py
@@ -1,15 +1,11 @@
 from typing_extensions import TypedDict
 
 
-class FileWithPatch(TypedDict):
+class ModifyCodePBInputs(TypedDict):
     file_path: str
     start_line: int
     end_line: int
-    patch: str
-
-
-class ModifyCodePBInputs(TypedDict):
-    files_with_patch: FileWithPatch
+    new_code: str
 
 
 class ModifyCodePBOutputs(TypedDict):

diff --git a/patchwork/steps/PR/typed.py b/patchwork/steps/PR/typed.py
@@ -5,6 +5,8 @@
 
 class ModifiedCodeFile(TypedDict):
     path: str
+    commit_message: str
+    patch_message: str
 
 
 class __PRInputsRequired(TypedDict):

diff --git a/patchwork/steps/PRPB/PRPB.py b/patchwork/steps/PRPB/PRPB.py
@@ -0,0 +1,33 @@
+from patchwork.step import Step
+from patchwork.steps import PR
+from patchwork.steps.PRPB.typed import PRPBInputs, PRPBOutputs
+
+
+class PRPB(Step, input_class=PRPBInputs, output_class=PRPBOutputs):
+    def __init__(self, inputs):
+        super().__init__(inputs)
+        key_map = dict(path=inputs["path_key"])
+        if inputs.get("title_key") is not None:
+            key_map["commit_message"] = inputs["comment_title_key"]
+        if inputs.get("message_key") is not None:
+            key_map["patch_message"] = inputs["comment_message_key"]
+
+        self.modified_files = []
+        input_modified_files = inputs.get("modified_files")
+        if isinstance(input_modified_files, list):
+            for modified_file in input_modified_files:
+                converted_modified_file = {key: modified_file.get(mapped_key) for key, mapped_key in key_map.items()}
+                self.modified_files.append(converted_modified_file)
+        elif isinstance(input_modified_files, dict):
+            converted_modified_file = {key: input_modified_files.get(mapped_key) for key, mapped_key in key_map.items()}
+            self.modified_files.append(converted_modified_file)
+        elif isinstance(input_modified_files, str):
+            converted_modified_file = {"path": input_modified_files}
+            self.modified_files.append(converted_modified_file)
+        self.inputs = inputs
+
+    def run(self):
+        pr = PR({**self.inputs, "modified_code_files": self.modified_files})
+        pr_outputs = pr.run()
+
+        return pr_outputs
diff --git a/patchwork/steps/PRPB/__init__.py b/patchwork/steps/PRPB/__init__.py