Skip to content

[Misc] Refract the structure of VLLMRuntime CRD #479

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Jun 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 54 additions & 54 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,55 +1,55 @@
repos:
- repo: https://github.com/rhysd/actionlint
rev: v1.7.7
hooks:
- id: actionlint
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: check-json
- id: check-toml
- id: check-yaml
args: [--allow-multiple-documents]
exclude: |
(?x)(
^helm/templates/|
.github/deployment-router.yaml
)
- id: end-of-file-fixer
- id: requirements-txt-fixer
- id: trailing-whitespace
- repo: https://github.com/hadolint/hadolint
rev: v2.12.0
hooks:
- id: hadolint-docker
stages: [manual] # Only run in CI, add to .github/workflows/pre-commit.yml
exclude: "src/gateway_inference_extension/Dockerfile"
- repo: https://github.com/gruntwork-io/pre-commit
rev: v0.1.25
hooks:
- id: helmlint
stages: [manual] # Only run in CI, add to .github/workflows/pre-commit.yml
- repo: https://github.com/psf/black
rev: '25.1.0'
hooks:
- id: black
- repo: https://github.com/pycqa/isort
rev: '6.0.0'
hooks:
- id: isort
# TODO: Enable this hook when environment issues are resolved
- repo: https://github.com/koalaman/shellcheck-precommit
rev: v0.10.0
hooks:
- id: shellcheck
stages: [manual] # Only run in CI, add to .github/workflows/pre-commit.yml
- repo: https://github.com/igorshubovych/markdownlint-cli
rev: v0.44.0
hooks:
- id: markdownlint
exclude: "(\\.github/PULL_REQUEST_TEMPLATE\\.md|CODE_OF_CONDUCT\\.md)"
- repo: https://github.com/codespell-project/codespell
rev: v2.4.1
hooks:
- id: codespell
args: ["--ignore-words", ".codespell-ignore"]
- repo: https://github.com/rhysd/actionlint
rev: v1.7.7
hooks:
- id: actionlint
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: check-json
- id: check-toml
- id: check-yaml
args: ["--allow-multiple-documents"]
exclude: |
(?x)(
^helm/templates/|
.github/deployment-router.yaml
)
- id: end-of-file-fixer
- id: requirements-txt-fixer
- id: trailing-whitespace
- repo: https://github.com/hadolint/hadolint
rev: v2.12.0
hooks:
- id: hadolint-docker
stages: [manual] # Only run in CI, add to .github/workflows/pre-commit.yml
exclude: "src/gateway_inference_extension/Dockerfile"
- repo: https://github.com/gruntwork-io/pre-commit
rev: v0.1.25
hooks:
- id: helmlint
stages: [manual] # Only run in CI, add to .github/workflows/pre-commit.yml
- repo: https://github.com/psf/black
rev: "25.1.0"
hooks:
- id: black
- repo: https://github.com/pycqa/isort
rev: "6.0.0"
hooks:
- id: isort
# TODO: Enable this hook when environment issues are resolved
- repo: https://github.com/koalaman/shellcheck-precommit
rev: v0.10.0
hooks:
- id: shellcheck
stages: [manual] # Only run in CI, add to .github/workflows/pre-commit.yml
- repo: https://github.com/igorshubovych/markdownlint-cli
rev: v0.44.0
hooks:
- id: markdownlint
exclude: "(\\.github/PULL_REQUEST_TEMPLATE\\.md|CODE_OF_CONDUCT\\.md)"
- repo: https://github.com/codespell-project/codespell
rev: v2.4.1
hooks:
- id: codespell
args: ["--ignore-words", ".codespell-ignore"]
74 changes: 37 additions & 37 deletions operator/PROJECT
Original file line number Diff line number Diff line change
Expand Up @@ -4,44 +4,44 @@
# More info: https://book.kubebuilder.io/reference/project-config.html
domain: vllm.ai
layout:
- go.kubebuilder.io/v4
- go.kubebuilder.io/v4
projectName: production-stack
repo: production-stack
resources:
- api:
crdVersion: v1
namespaced: true
controller: true
domain: vllm.ai
group: production stack
kind: VLLMRuntime
path: production-stack/api/v1alpha1
version: v1alpha1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: vllm.ai
group: production-stack
kind: VLLMRouter
path: production-stack/api/v1alpha1
version: v1alpha1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: vllm.ai
group: production-stack
kind: CacheServer
path: production-stack/api/v1alpha1
version: v1alpha1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: vllm.ai
group: production-stack
kind: LoraAdapter
path: production-stack/api/v1alpha1
version: v1alpha1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: vllm.ai
group: production stack
kind: VLLMRuntime
path: production-stack/api/v1alpha1
version: v1alpha1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: vllm.ai
group: production-stack
kind: VLLMRouter
path: production-stack/api/v1alpha1
version: v1alpha1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: vllm.ai
group: production-stack
kind: CacheServer
path: production-stack/api/v1alpha1
version: v1alpha1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: vllm.ai
group: production-stack
kind: LoraAdapter
path: production-stack/api/v1alpha1
version: v1alpha1
version: "3"
4 changes: 2 additions & 2 deletions operator/api/v1alpha1/loraadapter_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ type LoraAdapterSpec struct {
// +kubebuilder:validation:Required
BaseModel string `json:"baseModel"`
// DeploymentConfig defines how the adapter should be deployed
DeploymentConfig DeploymentConfig `json:"deploymentConfig,omitempty"`
LoraAdapterDeploymentConfig LoraAdapterDeploymentConfig `json:"loraAdapterDeploymentConfig,omitempty"`
// VLLMApiKey defines the configuration for vLLM API key authentication
VLLMApiKey *VLLMApiKeyConfig `json:"vllmApiKey,omitempty"`
}
Expand Down Expand Up @@ -63,7 +63,7 @@ type SecretRef struct {
Name string `json:"name,omitempty"`
}

type DeploymentConfig struct {
type LoraAdapterDeploymentConfig struct {
// Algorithm specifies which placement algorithm to use.
// +kubebuilder:validation:Required
// +kubebuilder:validation:Enum=default;ordered;equalized
Expand Down
58 changes: 35 additions & 23 deletions operator/api/v1alpha1/vllmruntime_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,41 @@ import (
// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.

// DeploymentConfig defines the deployment configuration
type DeploymentConfig struct {
// Replicas
// +kubebuilder:default=1
Replicas int32 `json:"replicas,omitempty"`

// Deploy strategy
// +kubebuilder:validation:Enum=RollingUpdate;Recreate
// +kubebuilder:default=RollingUpdate
DeployStrategy string `json:"deploymentStrategy,omitempty"`

// Resource requirements
Resources ResourceRequirements `json:"resources"`

// Image configuration
Image ImageSpec `json:"image"`
}

// VLLMRuntimeSpec defines the desired state of VLLMRuntime
type VLLMRuntimeSpec struct {
// Model configuration
Model ModelSpec `json:"model"`

// vLLM server configuration
VLLMConfig VLLMConfig `json:"vllmConfig"`

// LM Cache configuration
LMCacheConfig LMCacheConfig `json:"lmCacheConfig,omitempty"`

// Deployment configuration
DeploymentConfig DeploymentConfig `json:"deploymentConfig"`
}

// VLLMConfig defines the vLLM server configuration
type VLLMConfig struct {
// Enable chunked prefill
EnableChunkedPrefill bool `json:"enableChunkedPrefill,omitempty"`

Expand All @@ -44,9 +74,6 @@ type VLLMRuntimeSpec struct {
// Maximum number of LoRAs
MaxLoras int32 `json:"maxLoras,omitempty"`

// LM Cache configuration
LMCacheConfig LMCacheConfig `json:"lmCacheConfig,omitempty"`

// Extra arguments for vllm serve
ExtraArgs []string `json:"extraArgs,omitempty"`

Expand All @@ -59,34 +86,19 @@ type VLLMRuntimeSpec struct {

// Environment variables
Env []EnvVar `json:"env,omitempty"`
}

// Resource requirements
Resources ResourceRequirements `json:"resources"`

// Image configuration
Image ImageSpec `json:"image"`
// ModelSpec defines the model configuration
type ModelSpec struct {
// Model URL
ModelURL string `json:"modelURL"`

// HuggingFace token secret
HFTokenSecret corev1.LocalObjectReference `json:"hfTokenSecret,omitempty"`
// +kubebuilder:default=token
// +kubebuilder:validation:RequiredWhen=HFTokenSecret.Name!=""
HFTokenName string `json:"hfTokenName,omitempty"`

// Replicas
// +kubebuilder:default=1
Replicas int32 `json:"replicas,omitempty"`

// Deploy strategy
// +kubebuilder:validation:Enum=RollingUpdate;Recreate
// +kubebuilder:default=RollingUpdate
DeployStrategy string `json:"deploymentStrategy,omitempty"`
}

// ModelSpec defines the model configuration
type ModelSpec struct {
// Model URL
ModelURL string `json:"modelURL"`

// Enable LoRA
EnableLoRA bool `json:"enableLoRA,omitempty"`

Expand Down
Loading
Loading