Skip to content

Commit

Permalink
add compose build with llamacpp
Browse files Browse the repository at this point in the history
Signed-off-by: cbh778899 <[email protected]>
  • Loading branch information
cbh778899 committed Jul 30, 2024
1 parent a280ab1 commit 59c40ba
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 6 deletions.
4 changes: 3 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,6 @@ node_modules
package-lock.json
pnpm-lock.yaml
eslint.config.mjs
LICENSE
LICENSE
volumes
docker-compose,yaml
12 changes: 11 additions & 1 deletion .env
Original file line number Diff line number Diff line change
@@ -1 +1,11 @@
APP_PORT = 8000
APP_PORT=8000
INFERENCE_ENG=llamacpp
INFERENCE_ENG_PORT=8080
INFERENCE_ENG_VERSION=server--b1-2321a5e
NUM_CPU_CORES=8.00
NUM_CPU_CORES_EMBEDDING=4.00
EMBEDDING_ENG=embedding_eng
LANGUAGE_MODEL_NAME=Phi3-mini-4k-instruct-Q4.gguf
LANGUAGE_MODEL_URL=https://huggingface.co/aisuko/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi3-mini-4k-instruct-Q4.gguf?download=true
EMBEDDING_MODEL_NAME=all-MiniLM-L6-v2-Q4_K_M-v2.gguf
EMBEDDING_MODEL_URL=https://huggingface.co/aisuko/all-MiniLM-L6-v2-gguf/resolve/main/all-MiniLM-L6-v2-Q4_K_M-v2.gguf?download=true
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
node_modules
.git
volumes
6 changes: 4 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ WORKDIR /app
COPY . .

HEALTHCHECK --interval=300s --timeout=30s --start-period=5s --retries=3 CMD [ "node", "healthy-check.js" ]
RUN npm install -g pnpm && pnpm install
# RUN npm install -g pnpm && pnpm install
RUN npm install -g pnpm nodemon && pnpm install
EXPOSE 8000
ENTRYPOINT [ "npm", "start" ]
# ENTRYPOINT [ "npm", "start" ]
ENTRYPOINT [ "npm", "run", "dev" ]
41 changes: 39 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,49 @@
# project related
PROJECT_NAME:=voyager
CONTAINER_NAME:=voyager:v0.1.0
PORT:=8000
APP_PORT:=8000
# compose build related
ENV_FILE:=.env
INFERENCE_ENG:=llamacpp
INFERENCE_ENG_PORT:=8080
INFERENCE_ENG_VERSION:=server--b1-2321a5e
NUM_CPU_CORES:=8.00
NUM_CPU_CORES_EMBEDDING:=4.00

EMBEDDING_ENG:=embedding_eng
LANGUAGE_MODEL_NAME:=Phi3-mini-4k-instruct-Q4.gguf
LANGUAGE_MODEL_URL:=https://huggingface.co/aisuko/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi3-mini-4k-instruct-Q4.gguf?download=true
EMBEDDING_MODEL_NAME:=all-MiniLM-L6-v2-Q4_K_M-v2.gguf
EMBEDDING_MODEL_URL:=https://huggingface.co/aisuko/all-MiniLM-L6-v2-gguf/resolve/main/all-MiniLM-L6-v2-Q4_K_M-v2.gguf?download=true

# build and run this service only
.PHONY: build
build:
@docker build -t $(CONTAINER_NAME) .

.PHONY: run
run: build
@docker run --rm -p $(PORT):$(PORT) --name $(PROJECT_NAME) $(CONTAINER_NAME)
@docker run --rm -p $(PORT):$(PORT) --name $(PROJECT_NAME) $(CONTAINER_NAME)

# compose build with llamacpp
.PHONY: env
env:
@echo "APP_PORT=$(APP_PORT)"> $(ENV_FILE)
@echo "INFERENCE_ENG=$(INFERENCE_ENG)">> $(ENV_FILE)
@echo "INFERENCE_ENG_PORT=$(INFERENCE_ENG_PORT)">> $(ENV_FILE)
@echo "INFERENCE_ENG_VERSION=$(INFERENCE_ENG_VERSION)">> $(ENV_FILE)
@echo "NUM_CPU_CORES=$(NUM_CPU_CORES)">> $(ENV_FILE)
@echo "NUM_CPU_CORES_EMBEDDING=$(NUM_CPU_CORES_EMBEDDING)">> $(ENV_FILE)
@echo "EMBEDDING_ENG=$(EMBEDDING_ENG)">> $(ENV_FILE)
@echo "LANGUAGE_MODEL_NAME=$(LANGUAGE_MODEL_NAME)">> $(ENV_FILE)
@echo "LANGUAGE_MODEL_URL=$(LANGUAGE_MODEL_URL)">> $(ENV_FILE)
@echo "EMBEDDING_MODEL_NAME=$(EMBEDDING_MODEL_NAME)">> $(ENV_FILE)
@echo "EMBEDDING_MODEL_URL=$(EMBEDDING_MODEL_URL)">> $(ENV_FILE)

.PHONY: compose-build
compose-build: env
@docker compose -f docker-compose.yaml build

.PHONY: up
up: compose-build
@docker compose -f docker-compose.yaml up -d
48 changes: 48 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
services:
llamacpp:
container_name: ${INFERENCE_ENG}
image: gclub/llama.cpp:${INFERENCE_ENG_VERSION}
restart: no
deploy: # https://github.com/compose-spec/compose-spec/blob/master/deploy.md
resources:
reservations:
cpus: "${NUM_CPU_CORES}"
volumes:
- "${DOCKER_VOLUME_DIRECTORY:-.}/volumes/models:/models"
expose:
- 8080
ports:
- 8080:8080
command: ["-m", "models/${LANGUAGE_MODEL_NAME}","-c","8192"]

embedding_eng:
container_name: ${EMBEDDING_ENG}
image: gclub/llama.cpp:${INFERENCE_ENG_VERSION}
restart: no
deploy: # https://github.com/compose-spec/compose-spec/blob/master/deploy.md
resources:
reservations:
cpus: "${NUM_CPU_CORES_EMBEDDING}"
volumes:
- "${DOCKER_VOLUME_DIRECTORY:-.}/volumes/models:/models"
expose:
- 8080
ports:
- 8082:8080
command: ["-m", "models/${EMBEDDING_MODEL_NAME}","--embeddings","--pooling","mean","-c","512"]

voyager:
container_name: voyager
restart: no
build:
dockerfile: Dockerfile
context: .
volumes:
- .:/app
expose:
- 8000
ports:
- 8000:8000
depends_on:
- llamacpp
- embedding_eng

0 comments on commit 59c40ba

Please sign in to comment.