-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile.gpu
More file actions
65 lines (52 loc) · 2.69 KB
/
Dockerfile.gpu
File metadata and controls
65 lines (52 loc) · 2.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# CoDRAG Headless — GPU Image
# For serverless GPU providers (RunPod, Modal, AWS SageMaker) using local LLMs.
# Includes Ollama runtime + pre-baked Qwen3:4b model weights.
# No API keys needed — fully private, fully offline.
#
# Build:
# docker build -f Dockerfile.gpu -t codrag/headless:gpu .
#
# Usage:
# codrag sync-headless \
# --repo-path /workspace \
# --model-provider local --model-name qwen3:4b \
# --embedder native
FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04 AS base
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
OLLAMA_HOST=127.0.0.1:11434 \
OLLAMA_MODELS=/opt/ollama-models
# System dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
python3.11 python3.11-venv python3-pip \
git openssh-client curl ca-certificates \
&& rm -rf /var/lib/apt/lists/*
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 \
&& update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1
# ── Install Ollama ────────────────────────────────────────────
RUN curl -fsSL https://ollama.com/install.sh | sh
# ── Pre-bake the default model ────────────────────────────────
# Baking into the image avoids a multi-GB download on every serverless cold start.
# qwen3:4b (~2.5 GB) is the default fast model for headless indexing.
RUN ollama serve & \
sleep 3 && \
ollama pull qwen3:4b && \
kill %1 || true
# ── Install CoDRAG ───────────────────────────────────────────
WORKDIR /opt/codrag
COPY pyproject.toml .
COPY src/ src/
COPY engine/ engine/
RUN pip install --no-cache-dir ".[headless]"
# ── Pre-download ONNX embedding model ────────────────────────
RUN python -c "from codrag.core.embedder import NativeEmbedder; NativeEmbedder().download_model()" \
|| echo "WARN: Model pre-download skipped (will download on first run)"
# ── S3 sync dependencies ─────────────────────────────────────
RUN pip install --no-cache-dir boto3 minio
# ── Default entrypoint ────────────────────────────────────────
# The entrypoint script starts Ollama in the background, then runs codrag.
COPY public/codrag-deploy/entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]
CMD ["sync-headless", "--help"]