From 8b60bc57e6c980dfc5eed8932a3cf64244b2079b Mon Sep 17 00:00:00 2001 From: lebaudantoine Date: Mon, 16 Dec 2024 00:59:21 +0100 Subject: [PATCH] wip use our internal llm + switch from json to markdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit simplify LLM's job. Do not request Json output with a single key. Instead, make sure LLM don't output any extra information. By simplifying LLM's job, we're making sure its output can be parsed. I did a quick test with the Translate prompt. Adding instructions to output only translated text seems enough after a bunch of tests. I did a small prompt engineering, using ChatGPT and Claude to generate a proper system prompt … it works quite okay BUT there is room for improvement for sure. I'ven't searched yet OS prompts we could find in a prompt library. Perfect translation job seems to be a difficult job for a 8B model. Please note I haven't updated yet the other prompts, let's discuss it before. I ran my experiment with our internal LLM which is optimized for throughput, and not latency (there is a trade-off). I'll try fine tune few of its parameters to see if I can reduce its latency. For 880 tokens (based on chatgpt tokens counter online). It takes roughly 17s, vs ~40s for Albert CNRS 70B. For 180 tokens it takes roughly 3s. Without a proper UX (eg. a nicer loading animation, streaming tokens) it feels a decade. However, asking Chatgpt the same job take the same amount, from submitting the request to the last token being generated. --- src/backend/core/services/ai_services.py | 47 ++++++++++--------- src/helm/env.d/dev/values.impress.yaml.gotmpl | 3 ++ 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/src/backend/core/services/ai_services.py b/src/backend/core/services/ai_services.py index 102e86892..1a38c5535 100644 --- a/src/backend/core/services/ai_services.py +++ b/src/backend/core/services/ai_services.py @@ -35,10 +35,29 @@ ), } + AI_TRANSLATE = ( - "Translate the markdown text to {language:s}, preserving markdown formatting. " - 'Return JSON: {{"answer": "your translated markdown text in {language:s}"}}. ' - "Do not provide any other information." + """ + You are a professional translator for `{language:s}`. + + ### Guidelines: + 1. **Preserve exactly as-is:** + - All formatting, markdown, symbols, tags + - Names, numbers, URLs, citations + - Code blocks and technical terms + + 2. **Translation Rules:** + - Use natural expressions in the target language + - Match the tone of the source text (default: professional) + - Maintain original meaning precisely + - Adapt idioms to suit the target culture + - Ensure grammatical correctness stylistic coherence + + 3. **Do Not:** + - Add, remove, or explain any content + + Output only the translated text, keeping all original formatting intact. + """ ) @@ -59,32 +78,14 @@ def call_ai_api(self, system_content, text): """Helper method to call the OpenAI API and process the response.""" response = self.client.chat.completions.create( model=settings.AI_MODEL, - response_format={"type": "json_object"}, messages=[ {"role": "system", "content": system_content}, - {"role": "user", "content": json.dumps({"markdown_input": text})}, + {"role": "user", "content": text}, ], ) content = response.choices[0].message.content - - try: - sanitized_content = re.sub(r'\s*"answer"\s*:\s*', '"answer": ', content) - sanitized_content = re.sub(r"\s*\}", "}", sanitized_content) - sanitized_content = re.sub(r"(?