-
-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
implement api /v1/chat/completion (#3)
* add swagger-stats for monitor performance * ignore volumes when perform lint * fix name * update README * update env Signed-off-by: cbh778899 <[email protected]> * add needed environmental variables Signed-off-by: cbh778899 <[email protected]> * add function for inference requests Signed-off-by: cbh778899 <[email protected]> * move all api routes to seprate function Signed-off-by: cbh778899 <[email protected]> * add post /completions Signed-off-by: cbh778899 <[email protected]> * add helper functions Signed-off-by: cbh778899 <[email protected]> --------- Signed-off-by: cbh778899 <[email protected]>
- Loading branch information
Showing
9 changed files
with
203 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import { formatOpenAIContext } from "../tools/formatContext.js"; | ||
import { generateFingerprint } from "../tools/generator.js"; | ||
import { post } from "../tools/request.js"; | ||
|
||
function generateResponseContent(id, object, model, system_fingerprint, stream, content, stopped) { | ||
const resp = { | ||
id, | ||
object, | ||
created: Date.now(), | ||
model, | ||
system_fingerprint, | ||
choices: [{ | ||
index: 0, | ||
[stream ? 'delta':'message']: { | ||
role: 'assistant', | ||
content | ||
}, | ||
logprobs: null, | ||
finish_reason: stopped ? 'stop' : null | ||
}], | ||
} | ||
if(!stream) { | ||
resp.usage = { | ||
prompt_tokens: 0, | ||
completion_tokens: 0, | ||
total_tokens: 0 | ||
} | ||
} | ||
return resp; | ||
} | ||
|
||
export async function chatCompletion(req, res) { | ||
const api_key = (req.headers.authorization || '').split('Bearer ').pop(); | ||
if(!api_key) { | ||
res.status(401).send('Not Authorized'); | ||
return; | ||
} | ||
|
||
const system_fingerprint = generateFingerprint(); | ||
let {messages, ...request_body} = req.body; | ||
request_body.prompt = formatOpenAIContext(messages); | ||
const model = request_body.model || process.env.LANGUAGE_MODEL_NAME | ||
|
||
if(request_body.stream) { | ||
res.setHeader("Content-Type", "text/event-stream"); | ||
res.setHeader("Cache-Control", "no-cache"); | ||
res.setHeader("X-Accel-Buffering", "no"); | ||
res.setHeader("Connection", "Keep-Alive"); | ||
|
||
const eng_resp = await post('completion', { body: request_body }, { getJSON: false }); | ||
const reader = eng_resp.body.pipeThrough(new TextDecoderStream()).getReader(); | ||
while(true) { | ||
const { value, done } = await reader.read(); | ||
if(done) break; | ||
const data = value.split("data: ").pop() | ||
const json_data = JSON.parse(data) | ||
const { content, stop } = json_data; | ||
res.write(JSON.stringify(generateResponseContent(api_key, 'chat.completion.chunk', model, system_fingerprint, true, content, stop))+'\n\n'); | ||
} | ||
res.end(); | ||
} else { | ||
const eng_resp = await post('completion', { body: request_body }); | ||
const { model, content } = eng_resp; | ||
const response_json = generateResponseContent( | ||
api_key, 'chat.completion', model, system_fingerprint, | ||
false, content, true | ||
) | ||
res.send(response_json); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,10 @@ | ||
import { Router } from "express"; | ||
import { chatCompletion } from "../actions/inference.js"; | ||
|
||
export default function inferenceRoute() { | ||
const router = Router(); | ||
|
||
router.post('/completions', chatCompletion); | ||
|
||
return router; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
const system_context = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions." | ||
|
||
export function formatInferenceContext(history, question) { | ||
let context = system_context; | ||
context += history.map(({role, message}) => { | ||
return `### ${role === 'user' ? 'Human' : 'Assistant'}: ${message || ''}` | ||
}).join('\n'); | ||
context += `\n### Human: ${question}\n### Assistant:`; | ||
return context; | ||
} | ||
|
||
export function formatOpenAIContext(messages) { | ||
let context = messages.map(({role, content}) => { | ||
return `### ${role}: ${content}`; | ||
}).join("\n"); | ||
context += '\n### assistant:' | ||
return context; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
export function generateRandomString() { | ||
return Math.random().toString(32).slice(2) | ||
} | ||
|
||
export function generateFingerprint() { | ||
return 'fp_'+generateRandomString(); | ||
} | ||
|
||
export function generateAPIKey() { | ||
return 'voy-'+[...Array(4)].map(generateRandomString).join('') | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
const BASE_URL = { | ||
"chat": `http://${process.env.INFERENCE_ENG || 'llamacpp'}:${process.env.INFERENCE_ENG_PORT || 8080}`, | ||
"rag": `http://${process.env.EMBEDDING_ENG || 'embedding_eng'}:${process.env.EMBEDDING_ENG_PORT || 8081}` | ||
} | ||
|
||
const default_options = { | ||
headers: { | ||
'Content-Type': 'application/json' | ||
} | ||
} | ||
|
||
/** | ||
* @typedef RequestOptions | ||
* @property {"rag"|"chat"} eng select between rag engine or chat engine, default value is `chat` | ||
* @property {Boolean} getJSON | ||
* * If set to `true`, this function will return the result of `await(await fetch(...)).json();` | ||
* and include an attribute `http_error: true` if there's any http error occurs during fetch(). | ||
* * If set to `false`, this function will return the result of `await fetch(...);`, without error handling | ||
* * default value is `true`; | ||
*/ | ||
|
||
/** | ||
* A wrap of native fetch api helps fill default headers and urls | ||
* @param {String} url The url to send request | ||
* @param {RequestInit} options the options to init request | ||
* @param {RequestOptions} request_options extra options to be included | ||
* @returns {Promise<Response>|Object|{http_error: true}} | ||
*/ | ||
export default async function request(url, options={}, request_options={}) { | ||
const eng = request_options.eng || "chat"; | ||
const getJSON = Object.hasOwn(request_options, 'getJSON') ? request_options.getJSON : true | ||
|
||
url = `${BASE_URL[eng]}${url[0]!=='/' && '/'}${url}`; | ||
|
||
options = { | ||
...default_options, | ||
...options | ||
} | ||
|
||
if(options.body) { | ||
options.body = JSON.stringify(options.body) | ||
} | ||
|
||
const res = await fetch(url, options); | ||
if(getJSON) { | ||
if(res.ok) { | ||
return await res.json(); | ||
} else { | ||
return { http_error: true } | ||
} | ||
} else { | ||
return res; | ||
} | ||
} | ||
|
||
/** | ||
* A quick get {@link request} wrap | ||
* @param {String} url The url to send request | ||
* @param {RequestInit} options the options to init request | ||
* @param {RequestOptions} request_options extra options to be included | ||
* @returns {Promise<Response>|Object|{http_error: true}} | ||
*/ | ||
export function get(url, options, request_options) { | ||
return request(url, {method: 'GET', ...options}, request_options); | ||
} | ||
|
||
/** | ||
* A quick post {@link request} wrap | ||
* @param {String} url The url to send request | ||
* @param {RequestInit} options the options to init request | ||
* @param {RequestOptions} request_options extra options to be included | ||
* @returns {Promise<Response>|Object|{http_error: true}} | ||
*/ | ||
export function post(url, options, request_options) { | ||
return request(url, {method: 'POST', ...options}, request_options); | ||
} |