Skip to content

Commit

Permalink
format max_tokens in openai to n_predict in llamacpp
Browse files Browse the repository at this point in the history
Signed-off-by: cbh778899 <[email protected]>
  • Loading branch information
cbh778899 committed Aug 1, 2024
1 parent 9fa50c4 commit f5bdf2d
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions actions/inference.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,24 @@ function generateResponseContent(id, object, model, system_fingerprint, stream,
return resp;
}

const default_stop_keywords = ['### user:']

export async function chatCompletion(req, res) {
const api_key = (req.headers.authorization || '').split('Bearer ').pop();
if(!api_key) {
res.status(401).send('Not Authorized');
return;
}

const system_fingerprint = generateFingerprint();
let {messages, ...request_body} = req.body;
let {messages, max_tokens, ...request_body} = req.body;

// format requests to llamacpp format input
request_body.prompt = formatOpenAIContext(messages);
if(max_tokens) request_body.n_predict = max_tokens;
if(!request_body.stop) request_body.stop = [...default_stop_keywords];

// extra
const system_fingerprint = generateFingerprint();
const model = request_body.model || process.env.LANGUAGE_MODEL_NAME

if(request_body.stream) {
Expand Down

0 comments on commit f5bdf2d

Please sign in to comment.