Skip to content

Commit 605a4b9

Browse files
authored
Merge pull request #1534 from qodo-ai/tr/help_r
feat: improve help tool with markdown header formatting and error han…
2 parents a255087 + b989f41 commit 605a4b9

File tree

2 files changed

+48
-8
lines changed

2 files changed

+48
-8
lines changed

pr_agent/settings/pr_help_prompts.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ The output must be a YAML object equivalent to type $DocHelper, according to the
1313
=====
1414
class relevant_section(BaseModel):
1515
file_name: str = Field(description="The name of the relevant file")
16-
relevant_section_header_string: str = Field(description="From the relevant file, exact text of the relevant section heading. If no markdown heading is relevant, return empty string")
16+
relevant_section_header_string: str = Field(description="The exact text of the relevant markdown section heading from the relevant file (starting with '#', '##', etc.). Return empty string if the entire file is the relevant section, or if the relevant section has no heading")
1717
1818
class DocHelper(BaseModel):
1919
user_question: str = Field(description="The user's question")

pr_agent/tools/pr_help_message.py

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import copy
2+
import re
23
from functools import partial
34
from pathlib import Path
45

@@ -9,10 +10,9 @@
910
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
1011
from pr_agent.algo.pr_processing import retry_with_fallback_models
1112
from pr_agent.algo.token_handler import TokenHandler
12-
from pr_agent.algo.utils import ModelType, clip_tokens, load_yaml
13+
from pr_agent.algo.utils import ModelType, clip_tokens, load_yaml, get_max_tokens
1314
from pr_agent.config_loader import get_settings
14-
from pr_agent.git_providers import (BitbucketServerProvider, GithubProvider,
15-
get_git_provider_with_context)
15+
from pr_agent.git_providers import BitbucketServerProvider, GithubProvider, get_git_provider_with_context
1616
from pr_agent.log import get_logger
1717

1818

@@ -30,10 +30,11 @@ def extract_header(snippet):
3030
return res
3131

3232
class PRHelpMessage:
33-
def __init__(self, pr_url: str, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):
33+
def __init__(self, pr_url: str, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler, return_as_string=False):
3434
self.git_provider = get_git_provider_with_context(pr_url)
3535
self.ai_handler = ai_handler()
3636
self.question_str = self.parse_args(args)
37+
self.return_as_string = return_as_string
3738
self.num_retrieved_snippets = get_settings().get('pr_help.num_retrieved_snippets', 5)
3839
if self.question_str:
3940
self.vars = {
@@ -65,6 +66,34 @@ def parse_args(self, args):
6566
question_str = ""
6667
return question_str
6768

69+
def format_markdown_header(self, header: str) -> str:
70+
try:
71+
# First, strip common characters from both ends
72+
cleaned = header.strip('# 💎\n')
73+
74+
# Define all characters to be removed/replaced in a single pass
75+
replacements = {
76+
"'": '',
77+
"`": '',
78+
'(': '',
79+
')': '',
80+
',': '',
81+
'.': '',
82+
'?': '',
83+
'!': '',
84+
' ': '-'
85+
}
86+
87+
# Compile regex pattern for characters to remove
88+
pattern = re.compile('|'.join(map(re.escape, replacements.keys())))
89+
90+
# Perform replacements in a single pass and convert to lowercase
91+
return pattern.sub(lambda m: replacements[m.group()], cleaned).lower()
92+
except Exception:
93+
get_logger().exception(f"Error while formatting markdown header", artifacts={'header': header})
94+
return ""
95+
96+
6897
async def run(self):
6998
try:
7099
if self.question_str:
@@ -106,16 +135,27 @@ async def run(self):
106135
get_logger().debug(f"Token count of full documentation website: {token_count}")
107136

108137
model = get_settings().config.model
109-
max_tokens_full = MAX_TOKENS[model] # note - here we take the actual max tokens, without any reductions. we do aim to get the full documentation website in the prompt
138+
if model in MAX_TOKENS:
139+
max_tokens_full = MAX_TOKENS[model] # note - here we take the actual max tokens, without any reductions. we do aim to get the full documentation website in the prompt
140+
else:
141+
max_tokens_full = get_max_tokens(model)
110142
delta_output = 2000
111143
if token_count > max_tokens_full - delta_output:
112144
get_logger().info(f"Token count {token_count} exceeds the limit {max_tokens_full - delta_output}. Skipping the PR Help message.")
113145
docs_prompt = clip_tokens(docs_prompt, max_tokens_full - delta_output)
114146
self.vars['snippets'] = docs_prompt.strip()
115147

116148
# run the AI model
117-
response = await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.WEAK)
149+
response = await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.REGULAR)
118150
response_yaml = load_yaml(response)
151+
if isinstance(response_yaml, str):
152+
get_logger().warning(f"failing to parse response: {response_yaml}, publishing the response as is")
153+
if get_settings().config.publish_output:
154+
answer_str = f"### Question: \n{self.question_str}\n\n"
155+
answer_str += f"### Answer:\n\n"
156+
answer_str += response_yaml
157+
self.git_provider.publish_comment(answer_str)
158+
return ""
119159
response_str = response_yaml.get('response')
120160
relevant_sections = response_yaml.get('relevant_sections')
121161

@@ -138,7 +178,7 @@ async def run(self):
138178
for section in relevant_sections:
139179
file = section.get('file_name').strip().removesuffix('.md')
140180
if str(section['relevant_section_header_string']).strip():
141-
markdown_header = section['relevant_section_header_string'].strip().strip('#').strip().lower().replace(' ', '-').replace("'", '').replace('(', '').replace(')', '').replace(',', '').replace('.', '').replace('?', '').replace('!', '')
181+
markdown_header = self.format_markdown_header(section['relevant_section_header_string'])
142182
answer_str += f"> - {base_path}{file}#{markdown_header}\n"
143183
else:
144184
answer_str += f"> - {base_path}{file}\n"

0 commit comments

Comments
 (0)