From 881f8b3139794aca0a6699c3d34ebf8fba01b789 Mon Sep 17 00:00:00 2001 From: Sahil Date: Wed, 23 Apr 2025 17:26:24 +0530 Subject: [PATCH 1/3] perf(llm): Optimize pruneLines functions in countTokens --- core/llm/countTokens.ts | 45 ++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/core/llm/countTokens.ts b/core/llm/countTokens.ts index 2fc01f9b8b..e6eb73aae8 100644 --- a/core/llm/countTokens.ts +++ b/core/llm/countTokens.ts @@ -157,7 +157,7 @@ function countChatMessageTokens( ): number { // Doing simpler, safer version of what is here: // https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb - // every message follows <|im_start|>{role/name}\n{content}<|end|>\n + // every message follows `{role/name}\n{content}<|end|>\n` const TOKENS_PER_MESSAGE: number = 4; return countTokens(chatMessage.content, modelName) + TOKENS_PER_MESSAGE; } @@ -167,13 +167,27 @@ function pruneLinesFromTop( maxTokens: number, modelName: string, ): string { - let totalTokens = countTokens(prompt, modelName); const lines = prompt.split("\n"); - while (totalTokens > maxTokens && lines.length > 0) { - totalTokens -= countTokens(lines.shift()!, modelName); + const lineTokens = lines.map((line) => countTokens(line, modelName)); + let totalTokens = lineTokens.reduce((sum, tokens) => sum + tokens, 0); + let start = 0; + let currentLines = lines.length; + + // Calculate initial token count including newlines + totalTokens += Math.max(0, currentLines - 1); // Add tokens for joining newlines + + // Using indexes instead of array modifications. + // Remove lines from the top until the token count is within the limit. + while (totalTokens > maxTokens && start < currentLines) { + totalTokens -= lineTokens[start]; + // Decrement token count for the removed line and its preceding/joining newline (if not the last line) + if (currentLines - start > 1) { + totalTokens--; + } + start++; } - return lines.join("\n"); + return lines.slice(start).join("\n"); } function pruneLinesFromBottom( @@ -181,13 +195,26 @@ function pruneLinesFromBottom( maxTokens: number, modelName: string, ): string { - let totalTokens = countTokens(prompt, modelName); const lines = prompt.split("\n"); - while (totalTokens > maxTokens && lines.length > 0) { - totalTokens -= countTokens(lines.pop()!, modelName); + const lineTokens = lines.map((line) => countTokens(line, modelName)); + let totalTokens = lineTokens.reduce((sum, tokens) => sum + tokens, 0); + let end = lines.length; + + // Calculate initial token count including newlines + totalTokens += Math.max(0, end - 1); // Add tokens for joining newlines + + // Reverse traversal to avoid array modification + // Remove lines from the bottom until the token count is within the limit. + while (totalTokens > maxTokens && end > 0) { + end--; + totalTokens -= lineTokens[end]; + // Decrement token count for the removed line and its following/joining newline (if not the first line) + if (end > 0) { + totalTokens--; + } } - return lines.join("\n"); + return lines.slice(0, end).join("\n"); } function pruneStringFromBottom( From 28cdd1cee25973aa9a8a5cfdcbcc436e0b7f6240 Mon Sep 17 00:00:00 2001 From: Sahil Date: Wed, 23 Apr 2025 17:34:22 +0530 Subject: [PATCH 2/3] perf(llm): Optimize pruneLines functions in countTokens --- core/llm/countTokens.ts | 48 ++++++++++------------------------------- 1 file changed, 11 insertions(+), 37 deletions(-) diff --git a/core/llm/countTokens.ts b/core/llm/countTokens.ts index e6eb73aae8..5bb4ff1987 100644 --- a/core/llm/countTokens.ts +++ b/core/llm/countTokens.ts @@ -157,7 +157,7 @@ function countChatMessageTokens( ): number { // Doing simpler, safer version of what is here: // https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb - // every message follows `{role/name}\n{content}<|end|>\n` + // every message follows <|im_start|>{role/name}\n{content}<|end|>\n const TOKENS_PER_MESSAGE: number = 4; return countTokens(chatMessage.content, modelName) + TOKENS_PER_MESSAGE; } @@ -167,27 +167,13 @@ function pruneLinesFromTop( maxTokens: number, modelName: string, ): string { + let totalTokens = countTokens(prompt, modelName); const lines = prompt.split("\n"); - const lineTokens = lines.map((line) => countTokens(line, modelName)); - let totalTokens = lineTokens.reduce((sum, tokens) => sum + tokens, 0); - let start = 0; - let currentLines = lines.length; - - // Calculate initial token count including newlines - totalTokens += Math.max(0, currentLines - 1); // Add tokens for joining newlines - - // Using indexes instead of array modifications. - // Remove lines from the top until the token count is within the limit. - while (totalTokens > maxTokens && start < currentLines) { - totalTokens -= lineTokens[start]; - // Decrement token count for the removed line and its preceding/joining newline (if not the last line) - if (currentLines - start > 1) { - totalTokens--; - } - start++; + while (totalTokens > maxTokens && lines.length > 0) { + totalTokens -= countTokens(lines.shift()!, modelName); } - return lines.slice(start).join("\n"); + return lines.join("\n"); } function pruneLinesFromBottom( @@ -195,26 +181,13 @@ function pruneLinesFromBottom( maxTokens: number, modelName: string, ): string { + let totalTokens = countTokens(prompt, modelName); const lines = prompt.split("\n"); - const lineTokens = lines.map((line) => countTokens(line, modelName)); - let totalTokens = lineTokens.reduce((sum, tokens) => sum + tokens, 0); - let end = lines.length; - - // Calculate initial token count including newlines - totalTokens += Math.max(0, end - 1); // Add tokens for joining newlines - - // Reverse traversal to avoid array modification - // Remove lines from the bottom until the token count is within the limit. - while (totalTokens > maxTokens && end > 0) { - end--; - totalTokens -= lineTokens[end]; - // Decrement token count for the removed line and its following/joining newline (if not the first line) - if (end > 0) { - totalTokens--; - } + while (totalTokens > maxTokens && lines.length > 0) { + totalTokens -= countTokens(lines.pop()!, modelName); } - return lines.slice(0, end).join("\n"); + return lines.join("\n"); } function pruneStringFromBottom( @@ -633,5 +606,6 @@ export { pruneLinesFromTop, pruneRawPromptFromTop, pruneStringFromBottom, - pruneStringFromTop, + pruneStringFromTop }; + From 35b3189538da5891617a79ba9c0badb0bb7dc1bc Mon Sep 17 00:00:00 2001 From: Sahil Date: Wed, 23 Apr 2025 17:39:48 +0530 Subject: [PATCH 3/3] perf(llm): Optimize pruneLines functions in countTokens --- core/llm/countTokens.ts | 45 ++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/core/llm/countTokens.ts b/core/llm/countTokens.ts index 5bb4ff1987..265a928539 100644 --- a/core/llm/countTokens.ts +++ b/core/llm/countTokens.ts @@ -167,13 +167,28 @@ function pruneLinesFromTop( maxTokens: number, modelName: string, ): string { - let totalTokens = countTokens(prompt, modelName); const lines = prompt.split("\n"); - while (totalTokens > maxTokens && lines.length > 0) { - totalTokens -= countTokens(lines.shift()!, modelName); + // Preprocess tokens for all lines and cache them. + const lineTokens = lines.map((line) => countTokens(line, modelName)); + let totalTokens = lineTokens.reduce((sum, tokens) => sum + tokens, 0); + let start = 0; + let currentLines = lines.length; + + // Calculate initial token count including newlines + totalTokens += Math.max(0, currentLines - 1); // Add tokens for joining newlines + + // Using indexes instead of array modifications. + // Remove lines from the top until the token count is within the limit. + while (totalTokens > maxTokens && start < currentLines) { + totalTokens -= lineTokens[start]; + // Decrement token count for the removed line and its preceding/joining newline (if not the last line) + if (currentLines - start > 1) { + totalTokens--; + } + start++; } - return lines.join("\n"); + return lines.slice(start).join("\n"); } function pruneLinesFromBottom( @@ -181,13 +196,26 @@ function pruneLinesFromBottom( maxTokens: number, modelName: string, ): string { - let totalTokens = countTokens(prompt, modelName); const lines = prompt.split("\n"); - while (totalTokens > maxTokens && lines.length > 0) { - totalTokens -= countTokens(lines.pop()!, modelName); + const lineTokens = lines.map((line) => countTokens(line, modelName)); + let totalTokens = lineTokens.reduce((sum, tokens) => sum + tokens, 0); + let end = lines.length; + + // Calculate initial token count including newlines + totalTokens += Math.max(0, end - 1); // Add tokens for joining newlines + + // Reverse traversal to avoid array modification + // Remove lines from the bottom until the token count is within the limit. + while (totalTokens > maxTokens && end > 0) { + end--; + totalTokens -= lineTokens[end]; + // Decrement token count for the removed line and its following/joining newline (if not the first line) + if (end > 0) { + totalTokens--; + } } - return lines.join("\n"); + return lines.slice(0, end).join("\n"); } function pruneStringFromBottom( @@ -608,4 +636,3 @@ export { pruneStringFromBottom, pruneStringFromTop }; -