Add support for configuring max_completion_tokens for OpenAI

spantaleev · spantaleev · commit 47d8edea705a · 2025-02-27T09:58:58.000+02:00
Related to db94227
diff --git a/docs/sample-provider-configs/openai-o1.yml b/docs/sample-provider-configs/openai-o1.yml
@@ -6,9 +6,9 @@ text_generation:
   prompt: null
   temperature: 1.0
   # o1 models do not support max_response_tokens.
-  # They use `max_completion_tokens` as an alternative,
-  # but we don't support it yet (see https://github.com/64bit/async-openai/issues/272).
+  # They use `max_completion_tokens` as an alternative
   max_response_tokens: null
+  max_completion_tokens: 16384
   max_context_tokens: 128000
 speech_to_text:
   model_id: whisper-1
diff --git a/etc/app/config.yml.dist b/etc/app/config.yml.dist
@@ -80,6 +80,8 @@ agents:
     #       prompt: "You are a brief, but helpful bot called {{ baibot_name }} powered by the {{ baibot_model_id }} model. The date/time of this conversation's start is: {{ baibot_conversation_start_time_utc }}."
     #       temperature: 1.0
     #       max_response_tokens: 16384
+    #       # Reasoning models need to use `max_completion_tokens` instead of `max_response_tokens`.
+    #       max_completion_tokens: ~
     #       max_context_tokens: 128000
     #     speech_to_text:
     #       model_id: whisper-1
diff --git a/src/agent/provider/openai/config.rs b/src/agent/provider/openai/config.rs
@@ -58,6 +58,9 @@ pub struct TextGenerationConfig {
     #[serde(default)]
     pub max_response_tokens: Option<u32>,
 
+    #[serde(default)]
+    pub max_completion_tokens: Option<u32>,
+
     #[serde(default)]
     pub max_context_tokens: u32,
 }
@@ -69,6 +72,7 @@ impl Default for TextGenerationConfig {
             prompt: Some(default_prompt().to_owned()),
             temperature: super::super::default_temperature(),
             max_response_tokens: Some(16_384),
+            max_completion_tokens: None,
             max_context_tokens: 128_000,
         }
     }
diff --git a/src/agent/provider/openai/controller.rs b/src/agent/provider/openai/controller.rs
@@ -144,6 +144,10 @@ impl ControllerTrait for Controller {
             request_builder.max_tokens(max_response_tokens);
         }
 
+        if let Some(max_completion_tokens) = text_generation_config.max_completion_tokens {
+            request_builder.max_completion_tokens(max_completion_tokens);
+        }
+
         let request = request_builder.build()?;
 
         if let Ok(request_as_json) = serde_json::to_string(&request) {
diff --git a/src/agent/provider/openai_compat/config.rs b/src/agent/provider/openai_compat/config.rs
@@ -93,6 +93,7 @@ impl TryInto<OpenAITextGenerationConfig> for TextGenerationConfig {
             prompt: self.prompt,
             temperature: self.temperature,
             max_response_tokens: self.max_response_tokens,
+            max_completion_tokens: None,
             max_context_tokens: self.max_context_tokens,
         })
     }

Original file line number	Diff line number	Diff line change
`@@ -58,6 +58,9 @@ pub struct TextGenerationConfig {`
`58`	`58`	`#[serde(default)]`
`59`	`59`	`pub max_response_tokens: Option<u32>,`
`60`	`60`
	`61`	`+ #[serde(default)]`
	`62`	`+ pub max_completion_tokens: Option<u32>,`
	`63`	`+`
`61`	`64`	`#[serde(default)]`
`62`	`65`	`pub max_context_tokens: u32,`
`63`	`66`	`}`
`@@ -69,6 +72,7 @@ impl Default for TextGenerationConfig {`
`69`	`72`	`prompt: Some(default_prompt().to_owned()),`
`70`	`73`	`temperature: super::super::default_temperature(),`
`71`	`74`	`max_response_tokens: Some(16_384),`
	`75`	`+ max_completion_tokens: None,`
`72`	`76`	`max_context_tokens: 128_000,`
`73`	`77`	`}`
`74`	`78`	`}`
Original file line number	Diff line number	Diff line change
`@@ -144,6 +144,10 @@ impl ControllerTrait for Controller {`
`144`	`144`	`request_builder.max_tokens(max_response_tokens);`
`145`	`145`	`}`
`146`	`146`
	`147`	`+ if let Some(max_completion_tokens) = text_generation_config.max_completion_tokens {`
	`148`	`+ request_builder.max_completion_tokens(max_completion_tokens);`
	`149`	`+ }`
	`150`	`+`
`147`	`151`	`let request = request_builder.build()?;`
`148`	`152`
`149`	`153`	`if let Ok(request_as_json) = serde_json::to_string(&request) {`
Original file line number	Diff line number	Diff line change
`@@ -93,6 +93,7 @@ impl TryInto<OpenAITextGenerationConfig> for TextGenerationConfig {`
`93`	`93`	`prompt: self.prompt,`
`94`	`94`	`temperature: self.temperature,`
`95`	`95`	`max_response_tokens: self.max_response_tokens,`
	`96`	`+ max_completion_tokens: None,`
`96`	`97`	`max_context_tokens: self.max_context_tokens,`
`97`	`98`	`})`
`98`	`99`	`}`