-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add taskId and checkpointNumber parameters to completePrompt and rela… #43
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,19 +20,62 @@ export class KiloCodeHandler extends BaseProvider implements SingleCompletionHan | |
}) | ||
} | ||
|
||
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream { | ||
private getIdempotencyKey(taskId: string, checkpointNumber: number): string { | ||
// Create a deterministic idempotency key based on task_id and checkpoint number | ||
return `${taskId}-${checkpointNumber}` | ||
} | ||
|
||
async *createMessage( | ||
systemPrompt: string, | ||
messages: Anthropic.Messages.MessageParam[], | ||
taskId?: string, | ||
checkpointNumber?: number, | ||
): ApiStream { | ||
let stream: AnthropicStream<Anthropic.Messages.RawMessageStreamEvent> | ||
const cacheControl: CacheControlEphemeral = { type: "ephemeral" } | ||
let { id: modelId, maxTokens, thinking, temperature, virtualId } = this.getModel() | ||
const { id: modelId, maxTokens, thinking, temperature, virtualId } = this.getModel() | ||
|
||
const userMsgIndices = messages.reduce( | ||
(acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc), | ||
[] as number[], | ||
) | ||
// Use a for loop instead of reduce with spread to avoid linting error | ||
const userMsgIndices: number[] = [] | ||
for (let i = 0; i < messages.length; i++) { | ||
if (messages[i].role === "user") { | ||
userMsgIndices.push(i) | ||
} | ||
} | ||
|
||
const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1 | ||
const secondLastMsgUserIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1 | ||
|
||
// Prepare request options with headers | ||
const requestOptions: { headers: Record<string, string> } = (() => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Either this typing here or the one down on 64 is redundant, making it harder to maintain. |
||
const betas: string[] = [] | ||
|
||
// Check for models that support prompt caching | ||
switch (modelId) { | ||
case "claude-3-7-sonnet-20250219": | ||
case "claude-3-5-sonnet-20241022": | ||
case "claude-3-5-haiku-20241022": | ||
case "claude-3-opus-20240229": | ||
case "claude-3-haiku-20240307": | ||
betas.push("prompt-caching-2024-07-31") | ||
break | ||
} | ||
|
||
const headers: Record<string, string> = {} | ||
|
||
// Add beta features if any | ||
if (betas.length > 0) { | ||
headers["anthropic-beta"] = betas.join(",") | ||
} | ||
|
||
// Add idempotency key if task_id and checkpoint number are provided | ||
if (taskId && checkpointNumber !== undefined) { | ||
headers["idempotency-key"] = this.getIdempotencyKey(taskId, checkpointNumber) | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the AI way of doing it, the more efficient (preventing multiple object mutations) and readable way (imho) to do it would be like this: const headers = {
'anthropic-beta': betas.length > 0 ? betas.join(",") : undefined,
'idempotency-key': taskId && checkpointNumber !== undefined ? this.getIdempotencyKey(taskId, checkpointNumber),
} |
||
|
||
return { headers } | ||
})() | ||
|
||
stream = await this.client.messages.create( | ||
{ | ||
model: modelId, | ||
|
@@ -62,38 +105,12 @@ export class KiloCodeHandler extends BaseProvider implements SingleCompletionHan | |
// tools: tools, | ||
stream: true, | ||
}, | ||
(() => { | ||
// prompt caching: https://x.com/alexalbert__/status/1823751995901272068 | ||
// https://github.com/anthropics/anthropic-sdk-typescript?tab=readme-ov-file#default-headers | ||
// https://github.com/anthropics/anthropic-sdk-typescript/commit/c920b77fc67bd839bfeb6716ceab9d7c9bbe7393 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These comments seem useful. Preserve them? |
||
|
||
const betas = [] | ||
|
||
// // Check for the thinking-128k variant first | ||
// if (virtualId === "claude-3-7-sonnet-20250219:thinking") { | ||
// betas.push("output-128k-2025-02-19") | ||
// } | ||
|
||
// Then check for models that support prompt caching | ||
switch (modelId) { | ||
case "claude-3-7-sonnet-20250219": | ||
case "claude-3-5-sonnet-20241022": | ||
case "claude-3-5-haiku-20241022": | ||
case "claude-3-opus-20240229": | ||
case "claude-3-haiku-20240307": | ||
betas.push("prompt-caching-2024-07-31") | ||
return { | ||
headers: { "anthropic-beta": betas.join(",") }, | ||
} | ||
default: | ||
return undefined | ||
} | ||
})(), | ||
requestOptions, | ||
) | ||
|
||
for await (const chunk of stream) { | ||
switch (chunk.type) { | ||
case "message_start": | ||
case "message_start": { | ||
// Tells us cache reads/writes/input/output. | ||
const usage = chunk.message.usage | ||
|
||
|
@@ -106,6 +123,7 @@ export class KiloCodeHandler extends BaseProvider implements SingleCompletionHan | |
} | ||
|
||
break | ||
} | ||
case "message_delta": | ||
// Tells us stop_reason, stop_sequence, and output tokens | ||
// along the way and at the end of the message. | ||
|
@@ -174,17 +192,30 @@ export class KiloCodeHandler extends BaseProvider implements SingleCompletionHan | |
} | ||
} | ||
|
||
async completePrompt(prompt: string) { | ||
let { id: modelId, temperature } = this.getModel() | ||
async completePrompt(prompt: string, taskId?: string, checkpointNumber?: number) { | ||
const { id: modelId, temperature } = this.getModel() | ||
|
||
const message = await this.client.messages.create({ | ||
model: modelId, | ||
max_tokens: ANTHROPIC_DEFAULT_MAX_TOKENS, | ||
thinking: undefined, | ||
temperature, | ||
messages: [{ role: "user", content: prompt }], | ||
stream: false, | ||
}) | ||
// Prepare request options with headers | ||
const requestOptions: { headers: Record<string, string> } = { | ||
headers: {}, | ||
} | ||
|
||
// Add idempotency key if task_id and checkpoint number are provided | ||
if (taskId && checkpointNumber !== undefined) { | ||
requestOptions.headers["idempotency-key"] = this.getIdempotencyKey(taskId, checkpointNumber) | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe write this a bit more compactly? It feels like "a lot" in the way it's spread over so many lines, while in reality almost nothing is happening here. Code "weight" should feel proportional to the weight of the content. |
||
|
||
const message = await this.client.messages.create( | ||
{ | ||
model: modelId, | ||
max_tokens: ANTHROPIC_DEFAULT_MAX_TOKENS, | ||
thinking: undefined, | ||
temperature, | ||
messages: [{ role: "user", content: prompt }], | ||
stream: false, | ||
}, | ||
requestOptions, | ||
) | ||
|
||
const content = message.content.find(({ type }) => type === "text") | ||
return content?.type === "text" ? content.text : "" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1162,7 +1162,11 @@ export class Cline { | |
} | ||
return { role, content } | ||
}) | ||
const stream = this.api.createMessage(systemPrompt, cleanConversationHistory) | ||
// Get the current checkpoint number for idempotency key generation | ||
const checkpointNumber = this.clineMessages.filter(({ say }) => say === "checkpoint_saved").length | ||
|
||
// Pass task_id and checkpoint number to the API for idempotency key generation | ||
const stream = this.api.createMessage(systemPrompt, cleanConversationHistory, this.taskId, checkpointNumber) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same point about weight. Remove those comments. |
||
const iterator = stream[Symbol.asyncIterator]() | ||
|
||
try { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No need for this comment. This could've been a self-comment on this PR instead.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also: If you convert this to a
for .. of
loop it's modern JS best practice compliant, better readable and removes the need for the comment