Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update setup scripts #10

Merged
merged 5 commits into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .env
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
APP_PORT=8000
ENG_ACCESS_PORT=8080
MODEL_SAVE_PATH=volumes/models
INFERENCE_ENG=llamacpp
INFERENCE_ENG_PORT=8080
INFERENCE_ENG_VERSION=server--b1-2321a5e
Expand Down
12 changes: 11 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ APP_PORT:=8000
# compose build related
ENV_FILE:=.env

ENG_ACCESS_PORT:=8080
MODEL_SAVE_PATH:=volumes/models

INFERENCE_ENG:=llamacpp
INFERENCE_ENG_PORT:=8080
INFERENCE_ENG_VERSION:=server--b1-2321a5e
Expand Down Expand Up @@ -32,6 +35,8 @@ run: build
.PHONY: env
env:
@echo "APP_PORT=$(APP_PORT)"> $(ENV_FILE)
@echo "ENG_ACCESS_PORT=$(ENG_ACCESS_PORT)">> $(ENV_FILE)
@echo "MODEL_SAVE_PATH=$(MODEL_SAVE_PATH)">> $(ENV_FILE)
@echo "INFERENCE_ENG=$(INFERENCE_ENG)">> $(ENV_FILE)
@echo "INFERENCE_ENG_PORT=$(INFERENCE_ENG_PORT)">> $(ENV_FILE)
@echo "INFERENCE_ENG_VERSION=$(INFERENCE_ENG_VERSION)">> $(ENV_FILE)
Expand All @@ -44,8 +49,13 @@ env:
@echo "EMBEDDING_MODEL_NAME=$(EMBEDDING_MODEL_NAME)">> $(ENV_FILE)
@echo "EMBEDDING_MODEL_URL=$(EMBEDDING_MODEL_URL)">> $(ENV_FILE)

.PHONY: model-prepare
model-prepare:
@mkdir -p $(MODEL_SAVE_PATH) && [ -f $(MODEL_SAVE_PATH)/$(LANGUAGE_MODEL_NAME) ] || wget -O $(MODEL_SAVE_PATH)/$(LANGUAGE_MODEL_NAME) $(LANGUAGE_MODEL_URL)
@mkdir -p $(MODEL_SAVE_PATH) && [ -f $(MODEL_SAVE_PATH)/$(EMBEDDING_MODEL_NAME) ] || wget -O $(MODEL_SAVE_PATH)/$(EMBEDDING_MODEL_NAME) $(EMBEDDING_MODEL_URL)

.PHONY: compose-build
compose-build: env
compose-build: env model-prepare
@docker compose -f docker-compose.yaml build

.PHONY: up
Expand Down
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ This project is OpenAI-like API set for SkywardAI project.
## BUILD & RUN

### Local Machine
* Please make sure you installed `Node.js` on your local machine.
* This project developed on Node Version `v20.15.0`.
* Make sure you installed `Node.js`.

```shell
# Manage package by pnpm
Expand All @@ -22,7 +22,8 @@ npm run
```

### Container
**Please make sure you have `docker` and `make` installed in your server**
* Please make sure you have `docker` and `make` installed in your server.
* Docker version for testing is `27.0.3, build 7d4bcd8`.
```shell
# to simply start with all needed containers started, please run
make up
Expand All @@ -40,4 +41,4 @@ npm run lint
```

## Monitor
This project got monitor build with swagger-stats, when you got this project running, just go to `<Your Server>:<Your Port>/swagger-stats`
This project got monitor build with swagger-stats, when you got this project running, just go to `<Your Server>:<Your Port>/stats`
15 changes: 15 additions & 0 deletions actions/inference.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
// coding=utf-8

// Copyright [2024] [SkywardAI]
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { formatOpenAIContext } from "../tools/formatContext.js";
import { generateFingerprint } from "../tools/generator.js";
import { post } from "../tools/request.js";
Expand Down
22 changes: 11 additions & 11 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,47 +2,47 @@ services:
llamacpp:
container_name: ${INFERENCE_ENG}
image: gclub/llama.cpp:${INFERENCE_ENG_VERSION}
restart: no
restart: always
deploy: # https://github.com/compose-spec/compose-spec/blob/master/deploy.md
resources:
reservations:
cpus: "${NUM_CPU_CORES}"
volumes:
- "${DOCKER_VOLUME_DIRECTORY:-.}/volumes/models:/models"
- "${DOCKER_VOLUME_DIRECTORY:-.}/${MODEL_SAVE_PATH}:/models"
expose:
- 8080
- ${ENG_ACCESS_PORT}
ports:
- ${INFERENCE_ENG_PORT}:8080
- ${INFERENCE_ENG_PORT}:${ENG_ACCESS_PORT}
command: ["-m", "models/${LANGUAGE_MODEL_NAME}","-c","8192"]

embedding_eng:
container_name: ${EMBEDDING_ENG}
image: gclub/llama.cpp:${INFERENCE_ENG_VERSION}
restart: no
restart: always
deploy: # https://github.com/compose-spec/compose-spec/blob/master/deploy.md
resources:
reservations:
cpus: "${NUM_CPU_CORES_EMBEDDING}"
volumes:
- "${DOCKER_VOLUME_DIRECTORY:-.}/volumes/models:/models"
- "${DOCKER_VOLUME_DIRECTORY:-.}/${MODEL_SAVE_PATH}:/models"
expose:
- 8080
- ${ENG_ACCESS_PORT}
ports:
- ${EMBEDDING_ENG_PORT}:8080
- ${EMBEDDING_ENG_PORT}:${ENG_ACCESS_PORT}
command: ["-m", "models/${EMBEDDING_MODEL_NAME}","--embeddings","--pooling","mean","-c","512"]

voyager:
container_name: voyager
restart: no
restart: always
build:
dockerfile: Dockerfile
context: .
volumes:
- .:/app
expose:
- 8000
- ${APP_PORT}
ports:
- 8000:8000
- ${APP_PORT}:${APP_PORT}
depends_on:
- llamacpp
- embedding_eng
15 changes: 15 additions & 0 deletions healthy-check.js
Original file line number Diff line number Diff line change
@@ -1,2 +1,17 @@
// coding=utf-8

// Copyright [2024] [SkywardAI]
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

await fetch('http://localhost:8000/healthy');
console.log('Healthy check passed.')
18 changes: 17 additions & 1 deletion index.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
// coding=utf-8

// Copyright [2024] [SkywardAI]
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import express from 'express';
import cors from 'cors';
import bodyParser from 'body-parser';
Expand All @@ -13,7 +28,8 @@ const app = express();
app.use(cors());
app.use(bodyParser.json());
app.use(swStats.getMiddleware({
name: "Voyager Swagger Monitor"
name: "Voyager Swagger Monitor",
uriPath: '/stats'
}))

buildRoutes(app);
Expand Down
15 changes: 15 additions & 0 deletions routes/decoder.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
// coding=utf-8

// Copyright [2024] [SkywardAI]
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { Router } from "express";

export default function decoderRoute() {
Expand Down
15 changes: 15 additions & 0 deletions routes/embedding.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
// coding=utf-8

// Copyright [2024] [SkywardAI]
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { Router } from "express";

export default function embeddingRoute() {
Expand Down
15 changes: 15 additions & 0 deletions routes/encoder.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
// coding=utf-8

// Copyright [2024] [SkywardAI]
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { Router } from "express";

export default function encoderRoute() {
Expand Down
15 changes: 15 additions & 0 deletions routes/index.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
// coding=utf-8

// Copyright [2024] [SkywardAI]
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { Router } from "express";

import inferenceRoute from "./inference.js";
Expand Down
15 changes: 15 additions & 0 deletions routes/inference.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
// coding=utf-8

// Copyright [2024] [SkywardAI]
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { Router } from "express";
import { chatCompletion } from "../actions/inference.js";

Expand Down
15 changes: 15 additions & 0 deletions routes/token.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
// coding=utf-8

// Copyright [2024] [SkywardAI]
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { Router } from "express";
import { generateAPIKey } from "../tools/generator.js";

Expand Down
15 changes: 15 additions & 0 deletions routes/tracing.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
// coding=utf-8

// Copyright [2024] [SkywardAI]
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { Router } from "express";

export default function tracingRoute() {
Expand Down
15 changes: 15 additions & 0 deletions tools/formatContext.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
// coding=utf-8

// Copyright [2024] [SkywardAI]
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

const system_context = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions."

export function formatInferenceContext(history, question) {
Expand Down
15 changes: 15 additions & 0 deletions tools/generator.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
// coding=utf-8

// Copyright [2024] [SkywardAI]
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

export function generateRandomString() {
return Math.random().toString(32).slice(2)
}
Expand Down
15 changes: 15 additions & 0 deletions tools/request.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
// coding=utf-8

// Copyright [2024] [SkywardAI]
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

const BASE_URL = {
"chat": `http://${process.env.INFERENCE_ENG || 'llamacpp'}:${process.env.INFERENCE_ENG_PORT || 8080}`,
"rag": `http://${process.env.EMBEDDING_ENG || 'embedding_eng'}:${process.env.EMBEDDING_ENG_PORT || 8081}`
Expand Down
Loading