Skip to content

Commit a593f66

Browse files
kozistralvarobartt
andauthored
Fix the infinite loop when max_input_length is bigger than max-batch-tokens (#725)
Co-authored-by: Alvaro Bartolome <[email protected]>
1 parent 82d8560 commit a593f66

File tree

1 file changed

+23
-1
lines changed

1 file changed

+23
-1
lines changed

router/src/lib.rs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,13 +184,35 @@ pub async fn run(
184184
break;
185185
}
186186
}
187-
let max_input_length = match st_config {
187+
188+
let base_input_length = match st_config {
188189
Some(config) => config.max_seq_length,
189190
None => {
190191
tracing::warn!("Could not find a Sentence Transformers config");
191192
config.max_position_embeddings - position_offset
192193
}
193194
};
195+
196+
// Raise an error when max_input_length is bigger than max_batch tokens to prevent an infinite loop in the queue
197+
let max_input_length = if base_input_length > max_batch_tokens {
198+
if !auto_truncate {
199+
anyhow::bail!(
200+
"`--max-batch-tokens` cannot be lower than the model `max_input_length` ({} < {}) when `--auto-truncate` is disabled, add the `--auto-truncate` flag to truncate the input sequences to match the `--max-batch-tokens`.",
201+
base_input_length,
202+
max_batch_tokens
203+
);
204+
}
205+
tracing::warn!(
206+
"The input sequences will be truncated to {} tokens even if the model `max_input_length` is greater than the provided `--max-batch-tokens` ({} > {}), as `--auto-truncate` is enabled.",
207+
max_batch_tokens,
208+
base_input_length,
209+
max_batch_tokens
210+
);
211+
max_batch_tokens
212+
} else {
213+
base_input_length
214+
};
215+
194216
tracing::info!("Maximum number of tokens per request: {max_input_length}");
195217

196218
let tokenization_workers = tokenization_workers.unwrap_or_else(num_cpus::get);

0 commit comments

Comments
 (0)