Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ docker-compose.override.yml
*.db
*.sqlite
*.sqlite3
data/


# Cache and temporary files
.cache/
Expand Down Expand Up @@ -94,12 +94,13 @@ docs/.vitepress/dist/
docs/.vitepress/cache/

# Generated Swagger documentation
internal/handlers/swagger/docs.go
internal/handlers/swagger/swagger.json
internal/handlers/swagger/swagger.yaml
internal/api/handlers/swagger/docs.go
internal/api/handlers/swagger/swagger.json
internal/api/handlers/swagger/swagger.yaml

# Embedded documentation dist
internal/docs/dist/
internal/api/docs/dist/
internal/api/ui/dist/

# Claude specific files
.claude/
Expand Down
13 changes: 6 additions & 7 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ COPY docs/ ./
RUN npm run build

# Go Build stage
FROM golang:1.23-alpine AS builder
FROM golang:1.24-alpine AS builder

# Install build dependencies
RUN apk add --no-cache git gcc musl-dev
Expand All @@ -61,14 +61,14 @@ RUN go mod download
COPY . .

# Copy built UI from ui-builder stage
COPY --from=ui-builder /app/web/dist ./internal/ui/dist
COPY --from=ui-builder /app/web/dist ./internal/api/ui/dist

# Copy built docs from docs-builder stage
COPY --from=docs-builder /app/docs/.vitepress/dist ./internal/docs/dist
COPY --from=docs-builder /app/docs/.vitepress/dist ./internal/api/docs/dist

# Generate Swagger documentation
RUN go install github.com/swaggo/swag/cmd/swag@latest
RUN swag init -g cmd/server/main.go -o internal/handlers/swagger
RUN swag init -g cmd/server/main.go -o internal/api/handlers/swagger

# Build the application with embedded UI
RUN CGO_ENABLED=1 GOOS=linux go build -a -installsuffix cgo -o pllm cmd/server/main.go
Expand All @@ -88,13 +88,12 @@ WORKDIR /app

# Copy binary from builder
COPY --from=builder /app/pllm .
COPY --from=builder /app/docs ./docs

# Copy config file (if exists)
COPY --chown=pllm:pllm config.yaml* ./

# Copy pricing file
COPY --from=builder --chown=pllm:pllm /app/internal/config/model_prices_and_context_window.json ./internal/config/
COPY --from=builder --chown=pllm:pllm /app/internal/core/config/model_prices_and_context_window.json ./internal/core/config/

# Change ownership
RUN chown -R pllm:pllm /app
Expand All @@ -110,4 +109,4 @@ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD wget --no-verbose --tries=1 --spider http://localhost:8080/health || exit 1

# Run the application
ENTRYPOINT ["./pllm"]
ENTRYPOINT ["./pllm"]
40 changes: 24 additions & 16 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ web-build: ## Build frontend assets
cd web && npm run build

.PHONY: ui-build
ui-build: web-build ## Copy built frontend to internal/ui/dist for embedding
@mkdir -p internal/ui/dist
@cp -r web/dist/* internal/ui/dist/
@echo "✅ Frontend copied to internal/ui/dist/"
ui-build: web-build ## Copy built frontend to internal/api/ui/dist for embedding
@mkdir -p internal/api/ui/dist
@cp -r web/dist/* internal/api/ui/dist/
@echo "✅ Frontend copied to internal/api/ui/dist/"

.PHONY: build-worker
build-worker: ## Build the worker binary for background processing
Expand Down Expand Up @@ -336,24 +336,32 @@ redis-shell: ## Open Redis shell

.PHONY: test
test: swagger ## Run tests (generates swagger docs first)
mkdir -p internal/ui/dist
mkdir -p internal/docs/dist
touch internal/ui/dist/index.html
touch internal/docs/dist/index.html
mkdir -p internal/api/ui/dist
mkdir -p internal/api/docs/dist
touch internal/api/ui/dist/index.html
touch internal/api/docs/dist/index.html
go test -v ./...

.PHONY: test-coverage
test-coverage: swagger ## Run tests with coverage
mkdir -p internal/ui/dist
mkdir -p internal/docs/dist
touch internal/ui/dist/index.html
touch internal/docs/dist/index.html
mkdir -p internal/api/ui/dist
mkdir -p internal/api/docs/dist
touch internal/api/ui/dist/index.html
touch internal/api/docs/dist/index.html
go test -v -cover -coverprofile=coverage.txt ./...

.PHONY: test-integration
test-integration: ## Run integration tests
go test -v -tags=integration ./...

.PHONY: test-failover
test-failover: ## Run failover and performance integration tests
go test -v -timeout=60s ./internal/services/integration/ -run="Test"

.PHONY: test-performance
test-performance: ## Run performance benchmarks and validate banking requirements
go test -v -timeout=60s ./internal/services/integration/ -run="TestPerformanceBenchmarks"

.PHONY: lint
lint: ## Run linter
@which golangci-lint > /dev/null || go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest
Expand All @@ -375,7 +383,7 @@ install-tools: ## Install development tools
.PHONY: swagger
swagger: ## Generate Swagger documentation
@which swag > /dev/null || go install github.com/swaggo/swag/cmd/swag@latest
swag init -g cmd/server/main.go -o internal/handlers/swagger
swag init -g cmd/server/main.go -o internal/api/handlers/swagger

##@ Documentation

Expand All @@ -386,16 +394,16 @@ docs-dev: ## Run VitePress documentation in development mode
.PHONY: docs-build
docs-build: ## Build VitePress documentation
cd docs && npm run build
mkdir -p internal/docs/dist
cp -r docs/.vitepress/dist/* internal/docs/dist/
mkdir -p internal/api/docs/dist
cp -r docs/.vitepress/dist/* internal/api/docs/dist/

.PHONY: docs-preview
docs-preview: ## Preview built documentation
cd docs && npm run preview

.PHONY: clean
clean: ## Clean build artifacts
rm -rf bin/ tmp/ coverage.* *.out internal/docs/dist
rm -rf bin/ tmp/ coverage.* *.out internal/api/docs/dist

.PHONY: env-setup
env-setup: ## Create .env file from example
Expand Down
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion cmd/pllm/commands/budget.go → cmd/cli/commands/budget.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
"github.com/google/uuid"
"github.com/spf13/cobra"

"github.com/amerfu/pllm/internal/models"
"github.com/amerfu/pllm/internal/core/models"
)

// NewBudgetCommand creates a new budget management command
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion cmd/pllm/commands/key.go → cmd/cli/commands/key.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"github.com/google/uuid"
"github.com/spf13/cobra"

"github.com/amerfu/pllm/internal/models"
"github.com/amerfu/pllm/internal/core/models"
)

// NewKeyCommand creates a new key management command
Expand Down
2 changes: 1 addition & 1 deletion cmd/pllm/commands/team.go → cmd/cli/commands/team.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"github.com/google/uuid"
"github.com/spf13/cobra"

"github.com/amerfu/pllm/internal/models"
"github.com/amerfu/pllm/internal/core/models"
)

// NewTeamCommand creates a new team management command
Expand Down
2 changes: 1 addition & 1 deletion cmd/pllm/commands/user.go → cmd/cli/commands/user.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"github.com/google/uuid"
"github.com/spf13/cobra"

"github.com/amerfu/pllm/internal/models"
"github.com/amerfu/pllm/internal/core/models"
)

// NewUserCommand creates a new user management command
Expand Down
4 changes: 2 additions & 2 deletions cmd/pllm/main.go → cmd/cli/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ import (
"gorm.io/driver/postgres"
"gorm.io/gorm"

"github.com/amerfu/pllm/cmd/pllm/commands"
"github.com/amerfu/pllm/internal/models"
"github.com/amerfu/pllm/cmd/cli/commands"
"github.com/amerfu/pllm/internal/core/models"
)

var (
Expand Down
73 changes: 43 additions & 30 deletions cmd/server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,20 @@ import (
"syscall"
"time"

"github.com/amerfu/pllm/internal/config"
"github.com/amerfu/pllm/internal/database"
"github.com/amerfu/pllm/internal/logger"
"github.com/amerfu/pllm/internal/router"
"github.com/amerfu/pllm/internal/services/cache"
"github.com/amerfu/pllm/internal/services/models"
redisService "github.com/amerfu/pllm/internal/services/redis"
"github.com/amerfu/pllm/internal/core/config"
"github.com/amerfu/pllm/internal/core/database"
"github.com/amerfu/pllm/pkg/logger"
"github.com/amerfu/pllm/internal/api/router"
"github.com/amerfu/pllm/internal/services/data/cache"
"github.com/amerfu/pllm/internal/services/llm/models"
redisService "github.com/amerfu/pllm/internal/services/data/redis"
"github.com/amerfu/pllm/internal/services/worker"
"github.com/joho/godotenv"
"github.com/redis/go-redis/v9"
"go.uber.org/zap"
"gorm.io/gorm"

_ "github.com/amerfu/pllm/internal/handlers/swagger"
// _ "github.com/amerfu/pllm/internal/api/handlers/swagger" // TODO: Generate swagger docs
)

// @title pllm - Blazing Fast LLM Gateway
Expand Down Expand Up @@ -138,8 +138,38 @@ func main() {
// Services are now initialized in the router
// All authentication and management functionality is handled by the unified auth service

// Initialize Redis client early for model manager and worker
var redisClient *redis.Client
if appMode.RedisAvailable {
opt, err := redis.ParseURL(cfg.Redis.URL)
if err != nil {
log.Warn("Failed to parse Redis URL, continuing without Redis", zap.Error(err))
appMode.RedisAvailable = false
} else {
// Override with explicit password and DB if provided
if cfg.Redis.Password != "" {
opt.Password = cfg.Redis.Password
}
if cfg.Redis.DB != 0 {
opt.DB = cfg.Redis.DB
}

redisClient = redis.NewClient(opt)

// Test Redis connection
if err := redisClient.Ping(context.Background()).Err(); err != nil {
log.Warn("Redis not available, continuing without Redis features", zap.Error(err))
redisClient = nil
appMode.RedisAvailable = false
} else {
log.Info("Redis connected successfully")
}
}
}

// Initialize model manager (always needed)
modelManager := models.NewModelManager(log, cfg.Router)
// Pass Redis client for distributed latency tracking (nil if Redis not available)
modelManager := models.NewModelManager(log, cfg.Router, redisClient)
if err := modelManager.LoadModelInstances(cfg.ModelList); err != nil {
log.Fatal("Failed to load model instances", zap.Error(err))
}
Expand All @@ -166,27 +196,10 @@ func main() {
var workerCtx context.Context
var workerCancel context.CancelFunc

if !appMode.IsLiteMode && appMode.RedisAvailable && db != nil {
// Initialize Redis client for worker
opt, err := redis.ParseURL(cfg.Redis.URL)
if err != nil {
log.Fatal("Failed to parse Redis URL", zap.Error(err))
}

// Override with explicit password and DB if provided
if cfg.Redis.Password != "" {
opt.Password = cfg.Redis.Password
}
if cfg.Redis.DB != 0 {
opt.DB = cfg.Redis.DB
}

redisClient := redis.NewClient(opt)

// Test Redis connection for worker
if err := redisClient.Ping(context.Background()).Err(); err != nil {
log.Warn("Redis not available for background worker", zap.Error(err))
} else {
if !appMode.IsLiteMode && appMode.RedisAvailable && db != nil && redisClient != nil {
// Use the Redis client initialized earlier
// Redis connection already verified above
{
// Initialize Redis services for worker
usageQueue := redisService.NewUsageQueue(&redisService.UsageQueueConfig{
Client: redisClient,
Expand Down
4 changes: 2 additions & 2 deletions cmd/worker/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ import (
"gorm.io/driver/postgres"
"gorm.io/gorm"

"github.com/amerfu/pllm/internal/config"
redisService "github.com/amerfu/pllm/internal/services/redis"
"github.com/amerfu/pllm/internal/core/config"
redisService "github.com/amerfu/pllm/internal/services/data/redis"
"github.com/amerfu/pllm/internal/services/worker"
)

Expand Down
30 changes: 16 additions & 14 deletions docs/.vitepress/config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -16,37 +16,39 @@ export default withMermaid(
],
sidebar: [
{
text: "Getting Started",
text: "Introduction",
items: [
{ text: "What is pllm?", link: "/" },
{ text: "Installation & Setup", link: "/guide/getting-started" },
{ text: "Quick Start Guide", link: "/guide/quickstart" },
{ text: "What is PLLM?", link: "/" },
{ text: "Quick Start", link: "/guide/quickstart" },
{ text: "Installation", link: "/guide/getting-started" },
],
},
{
text: "Core Features",
text: "Configuration",
items: [
{ text: "System Architecture", link: "/guide/architecture" },
{ text: "Multi-Provider Support", link: "/providers" },
{ text: "Configuration Guide", link: "/config" },
{ text: "Model Routing & Load Balancing", link: "/guide/routing" },
{ text: "Provider Setup", link: "/providers" },
{ text: "Authentication", link: "/auth" },
{ text: "Configuration", link: "/config" },
],
},
{
text: "Architecture",
items: [
{ text: "System Overview", link: "/guide/architecture" },
{ text: "Resilience & Reliability", link: "/guide/resilience" },
],
},
{
text: "API Reference",
items: [
{ text: "OpenAI Compatible API", link: "/api" },
{ text: "Chat Completions", link: "/api#chat-completions" },
{ text: "Models", link: "/api#models" },
{ text: "Health Checks", link: "/api#health-checks" },
],
},
{
text: "Deployment",
items: [
{ text: "Docker Deployment", link: "/deployment" },
{ text: "Kubernetes", link: "/deployment#kubernetes" },
{ text: "Production Setup", link: "/deployment#production" },
{ text: "Docker & Kubernetes", link: "/deployment" },
],
},
],
Expand Down
Loading