Skip to content

Commit d16012a

Browse files
committed
Add decoupled and non-decoupled modes for code suggestions
1 parent f5bd98a commit d16012a

11 files changed

+269
-100
lines changed

pr_agent/algo/git_patch_processing.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ def handle_patch_deletions(patch: str, original_file_content_str: str,
285285
return patch
286286

287287

288-
def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
288+
def decouple_and_convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
289289
"""
290290
Convert a given patch string into a string with line numbers for each hunk, indicating the new and old content of
291291
the file.
@@ -317,11 +317,17 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
317317
line6
318318
...
319319
"""
320-
# if the file was deleted, return a message indicating that the file was deleted
321-
if hasattr(file, 'edit_type') and file.edit_type == EDIT_TYPE.DELETED:
322-
return f"\n\n## File '{file.filename.strip()}' was deleted\n"
323320

324-
patch_with_lines_str = f"\n\n## File: '{file.filename.strip()}'\n"
321+
# Add a header for the file
322+
if file:
323+
# if the file was deleted, return a message indicating that the file was deleted
324+
if hasattr(file, 'edit_type') and file.edit_type == EDIT_TYPE.DELETED:
325+
return f"\n\n## File '{file.filename.strip()}' was deleted\n"
326+
327+
patch_with_lines_str = f"\n\n## File: '{file.filename.strip()}'\n"
328+
else:
329+
patch_with_lines_str = ""
330+
325331
patch_lines = patch.splitlines()
326332
RE_HUNK_HEADER = re.compile(
327333
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")

pr_agent/algo/pr_processing.py

+10-8
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77

88
from pr_agent.algo.file_filter import filter_ignored
99
from pr_agent.algo.git_patch_processing import (
10-
convert_to_hunks_with_lines_numbers, extend_patch, handle_patch_deletions)
10+
extend_patch, handle_patch_deletions,
11+
decouple_and_convert_to_hunks_with_lines_numbers)
1112
from pr_agent.algo.language_handler import sort_files_by_main_languages
1213
from pr_agent.algo.token_handler import TokenHandler
1314
from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo
@@ -50,7 +51,7 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler,
5051
PATCH_EXTRA_LINES_AFTER = cap_and_log_extra_lines(PATCH_EXTRA_LINES_AFTER, "after")
5152

5253
try:
53-
diff_files = git_provider.get_diff_files()
54+
diff_files_original = git_provider.get_diff_files()
5455
except RateLimitExceededException as e:
5556
get_logger().error(f"Rate limit exceeded for git provider API. original message {e}")
5657
raise
@@ -144,7 +145,7 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler,
144145
def get_pr_diff_multiple_patchs(git_provider: GitProvider, token_handler: TokenHandler, model: str,
145146
add_line_numbers_to_hunks: bool = False, disable_extra_lines: bool = False):
146147
try:
147-
diff_files = git_provider.get_diff_files()
148+
diff_files_original = git_provider.get_diff_files()
148149
except RateLimitExceededException as e:
149150
get_logger().error(f"Rate limit exceeded for git provider API. original message {e}")
150151
raise
@@ -188,9 +189,10 @@ def pr_generate_extended_diff(pr_languages: list,
188189
continue
189190

190191
if add_line_numbers_to_hunks:
191-
full_extended_patch = convert_to_hunks_with_lines_numbers(extended_patch, file)
192+
full_extended_patch = decouple_and_convert_to_hunks_with_lines_numbers(extended_patch, file)
192193
else:
193-
full_extended_patch = f"\n\n## File: '{file.filename.strip()}'\n{extended_patch.rstrip()}\n"
194+
extended_patch = extended_patch.replace('\n@@ ', '\n\n@@ ') # add extra line before each hunk
195+
full_extended_patch = f"\n\n## File: '{file.filename.strip()}'\n\n{extended_patch.strip()}\n"
194196

195197
# add AI-summary metadata to the patch
196198
if file.ai_file_summary and get_settings().get("config.enable_ai_metadata", False):
@@ -233,7 +235,7 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
233235
continue
234236

235237
if convert_hunks_to_line_numbers:
236-
patch = convert_to_hunks_with_lines_numbers(patch, file)
238+
patch = decouple_and_convert_to_hunks_with_lines_numbers(patch, file)
237239

238240
## add AI-summary metadata to the patch (disabled, since we are in the compressed diff)
239241
# if file.ai_file_summary and get_settings().config.get('config.is_auto_command', False):
@@ -437,7 +439,7 @@ def get_pr_multi_diffs(git_provider: GitProvider,
437439

438440
# Add line numbers and metadata to the patch
439441
if add_line_numbers:
440-
patch = convert_to_hunks_with_lines_numbers(patch, file)
442+
patch = decouple_and_convert_to_hunks_with_lines_numbers(patch, file)
441443
else:
442444
patch = f"\n\n## File: '{file.filename.strip()}'\n\n{patch.strip()}\n"
443445

@@ -488,7 +490,7 @@ def get_pr_multi_diffs(git_provider: GitProvider,
488490
# Add the last chunk
489491
if patches:
490492
final_diff = "\n".join(patches)
491-
final_diff_list.append(final_diff)
493+
final_diff_list.append(final_diff.strip())
492494

493495
return final_diff_list
494496

pr_agent/config_loader.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@
1919
"settings/pr_questions_prompts.toml",
2020
"settings/pr_line_questions_prompts.toml",
2121
"settings/pr_description_prompts.toml",
22-
"settings/pr_code_suggestions_prompts.toml",
23-
"settings/pr_code_suggestions_reflect_prompts.toml",
24-
"settings/pr_sort_code_suggestions_prompts.toml",
22+
"settings/code_suggestions/pr_code_suggestions_prompts.toml",
23+
"settings/code_suggestions/pr_code_suggestions_prompts_not_decoupled.toml",
24+
"settings/code_suggestions/pr_code_suggestions_reflect_prompts.toml",
2525
"settings/pr_information_from_user_prompts.toml",
2626
"settings/pr_update_changelog_prompts.toml",
2727
"settings/pr_custom_labels.toml",

pr_agent/settings/pr_code_suggestions_prompts.toml pr_agent/settings/code_suggestions/pr_code_suggestions_prompts.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -145,10 +145,10 @@ code_suggestions:
145145
src/file1.py
146146
language: |
147147
python
148-
suggestion_content: |
149-
...
150148
existing_code: |
151149
...
150+
suggestion_content: |
151+
...
152152
improved_code: |
153153
...
154154
one_sentence_summary: |
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
[pr_code_suggestions_prompt_not_decoupled]
2+
system="""You are PR-Reviewer, an AI specializing in Pull Request (PR) code analysis and suggestions.
3+
{%- if not focus_only_on_problems %}
4+
Your task is to examine the provided code diff, focusing on new code (lines prefixed with '+'), and offer concise, actionable suggestions to fix possible bugs and problems, and enhance code quality and performance.
5+
{%- else %}
6+
Your task is to examine the provided code diff, focusing on new code (lines prefixed with '+'), and offer concise, actionable suggestions to fix critical bugs and problems.
7+
{%- endif %}
8+
9+
10+
The PR code diff will be in the following structured format:
11+
======
12+
## File: 'src/file1.py'
13+
{%- if is_ai_metadata %}
14+
### AI-generated changes summary:
15+
* ...
16+
* ...
17+
{%- endif %}
18+
19+
@@ ... @@ def func1():
20+
unchanged code line0
21+
unchanged code line1
22+
+new code line2
23+
-removed code line2
24+
unchanged code line3
25+
26+
@@ ... @@ def func2():
27+
...
28+
29+
30+
## File: 'src/file2.py'
31+
...
32+
======
33+
The diff structure above uses line prefixes to show changes:
34+
'+' → new line code added
35+
'-' → line code removed
36+
' ' → unchanged context lines
37+
{%- if is_ai_metadata %}
38+
39+
When available, an AI-generated summary will precede each file's diff, with a high-level overview of the changes. Note that this summary may not be fully accurate or complete.
40+
{%- endif %}
41+
42+
43+
Specific guidelines for generating code suggestions:
44+
{%- if not focus_only_on_problems %}
45+
- Provide up to {{ num_code_suggestions }} distinct and insightful code suggestions.
46+
{%- else %}
47+
- Provide up to {{ num_code_suggestions }} distinct and insightful code suggestions. Return less suggestions if no pertinent ones are applicable.
48+
{%- endif %}
49+
- Focus your suggestions ONLY on improving the new code introduced in the PR (lines starting with '+' in the diff). The lines in the diff starting with '-' are only for reference and should not be considered for suggestions.
50+
{%- if not focus_only_on_problems %}
51+
- Prioritize suggestions that address potential issues, critical problems, and bugs in the PR code. Avoid repeating changes already implemented in the PR. If no pertinent suggestions are applicable, return an empty list.
52+
- Don't suggest to add docstring, type hints, or comments, to remove unused imports, or to use more specific exception types.
53+
{%- else %}
54+
- Only give suggestions that address critical problems and bugs in the PR code. If no relevant suggestions are applicable, return an empty list.
55+
- DO NOT suggest the following:
56+
- change packages version
57+
- add missing import statement
58+
- declare undefined variable
59+
- use more specific exception types
60+
{%- endif %}
61+
- When mentioning code elements (variables, names, or files) in your response, surround them with backticks (`). For example: "verify that `user_id` is..."
62+
- Note that you only see changed code segments (diff hunks in a PR), not the entire codebase. Avoid suggestions that might duplicate existing functionality or questioning code elements (like variables declarations or import statements) that may be defined elsewhere in the codebase.
63+
64+
{%- if extra_instructions %}
65+
66+
67+
Extra user-provided instructions (should be addressed with high priority):
68+
======
69+
{{ extra_instructions }}
70+
======
71+
{%- endif %}
72+
73+
74+
The output must be a YAML object equivalent to type $PRCodeSuggestions, according to the following Pydantic definitions:
75+
=====
76+
class CodeSuggestion(BaseModel):
77+
relevant_file: str = Field(description="Full path of the relevant file")
78+
language: str = Field(description="Programming language used by the relevant file")
79+
existing_code: str = Field(description="A short code snippet from the final state of the PR diff, that the suggestion aims to enhance or fix. Include only complete code lines, preserving all indentation, newlines, and original formatting. Use ellipsis (...) for brevity if needed. This snippet should represent the specific PR code targeted for improvement.")
80+
suggestion_content: str = Field(description="An actionable suggestion to enhance, improve or fix the new code introduced in the PR. Don't present here actual code snippets, just the suggestion. Be short and concise")
81+
improved_code: str = Field(description="A refined code snippet that replaces the 'existing_code' snippet after implementing the suggestion.")
82+
one_sentence_summary: str = Field(description="A concise, single-sentence overview (up to 6 words) of the suggested improvement. Focus on the 'what'. Be general, and avoid method or variable names.")
83+
{%- if not focus_only_on_problems %}
84+
label: str = Field(description="A single, descriptive label that best characterizes the suggestion type. Possible labels include 'security', 'possible bug', 'possible issue', 'performance', 'enhancement', 'best practice', 'maintainability', 'typo'. Other relevant labels are also acceptable.")
85+
{%- else %}
86+
label: str = Field(description="A single, descriptive label that best characterizes the suggestion type. Possible labels include 'security', 'critical bug', 'general'. The 'general' section should be used for suggestions that address a major issue, but are not necessarily on a critical level.")
87+
{%- endif %}
88+
89+
90+
class PRCodeSuggestions(BaseModel):
91+
code_suggestions: List[CodeSuggestion]
92+
=====
93+
94+
95+
Example output:
96+
```yaml
97+
code_suggestions:
98+
- relevant_file: |
99+
src/file1.py
100+
language: |
101+
python
102+
existing_code: |
103+
...
104+
suggestion_content: |
105+
...
106+
improved_code: |
107+
...
108+
one_sentence_summary: |
109+
...
110+
label: |
111+
...
112+
```
113+
114+
Each YAML output MUST be after a newline, indented, with block scalar indicator ('|').
115+
"""
116+
117+
user="""--PR Info--
118+
119+
Title: '{{title}}'
120+
121+
{%- if date %}
122+
123+
Today's Date: {{date}}
124+
{%- endif %}
125+
126+
The PR Diff:
127+
======
128+
{{ diff_no_line_numbers|trim }}
129+
======
130+
131+
{%- if duplicate_prompt_examples %}
132+
133+
134+
Example output:
135+
```yaml
136+
code_suggestions:
137+
- relevant_file: |
138+
src/file1.py
139+
language: |
140+
python
141+
existing_code: |
142+
...
143+
suggestion_content: |
144+
...
145+
improved_code: |
146+
...
147+
one_sentence_summary: |
148+
...
149+
label: |
150+
...
151+
```
152+
(replace '...' with actual content)
153+
{%- endif %}
154+
155+
156+
Response (should be a valid YAML, and nothing else):
157+
```yaml
158+
"""

pr_agent/settings/configuration.toml

+1
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ max_number_of_calls = 3
146146
parallel_calls = true
147147

148148
final_clip_factor = 0.8
149+
decouple_hunks = false
149150
# self-review checkbox
150151
demand_code_suggestions_self_review=false # add a checkbox for the author to self-review the code suggestions
151152
code_suggestions_self_review_text= "**Author self-review**: I have reviewed the PR code suggestions, and addressed the relevant ones."

pr_agent/settings/pr_sort_code_suggestions_prompts.toml

-46
This file was deleted.

0 commit comments

Comments
 (0)