vllm-project · YuhanLiu11 · Jun 7, 2025 · May 8, 2025 · May 8, 2025 · May 9, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,55 +1,55 @@
 repos:
-- repo: https://github.com/rhysd/actionlint
-  rev: v1.7.7
-  hooks:
-  - id: actionlint
-- repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v5.0.0
-  hooks:
-  - id: check-json
-  - id: check-toml
-  - id: check-yaml
-    args: [--allow-multiple-documents]
-    exclude: |
-      (?x)(
-          ^helm/templates/|
-          .github/deployment-router.yaml
-      )
-  - id: end-of-file-fixer
-  - id: requirements-txt-fixer
-  - id: trailing-whitespace
-- repo: https://github.com/hadolint/hadolint
-  rev: v2.12.0
-  hooks:
-  - id: hadolint-docker
-    stages: [manual] # Only run in CI, add to .github/workflows/pre-commit.yml
-    exclude: "src/gateway_inference_extension/Dockerfile"
-- repo: https://github.com/gruntwork-io/pre-commit
-  rev: v0.1.25
-  hooks:
-  - id: helmlint
-    stages: [manual] # Only run in CI, add to .github/workflows/pre-commit.yml
-- repo: https://github.com/psf/black
-  rev: '25.1.0'
-  hooks:
-  - id: black
-- repo: https://github.com/pycqa/isort
-  rev: '6.0.0'
-  hooks:
-  - id: isort
-# TODO: Enable this hook when environment issues are resolved
-- repo: https://github.com/koalaman/shellcheck-precommit
-  rev: v0.10.0
-  hooks:
-  - id: shellcheck
-    stages: [manual] # Only run in CI, add to .github/workflows/pre-commit.yml
-- repo: https://github.com/igorshubovych/markdownlint-cli
-  rev: v0.44.0
-  hooks:
-  - id: markdownlint
-    exclude: "(\\.github/PULL_REQUEST_TEMPLATE\\.md|CODE_OF_CONDUCT\\.md)"
-- repo: https://github.com/codespell-project/codespell
-  rev: v2.4.1
-  hooks:
-  - id: codespell
-    args: ["--ignore-words", ".codespell-ignore"]
+  - repo: https://github.com/rhysd/actionlint
+    rev: v1.7.7
+    hooks:
+      - id: actionlint
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      - id: check-json
+      - id: check-toml
+      - id: check-yaml
+        args: ["--allow-multiple-documents"]
+        exclude: |
+          (?x)(
+              ^helm/templates/|
+              .github/deployment-router.yaml
+          )
+      - id: end-of-file-fixer
+      - id: requirements-txt-fixer
+      - id: trailing-whitespace
+  - repo: https://github.com/hadolint/hadolint
+    rev: v2.12.0
+    hooks:
+      - id: hadolint-docker
+        stages: [manual] # Only run in CI, add to .github/workflows/pre-commit.yml
+        exclude: "src/gateway_inference_extension/Dockerfile"
+  - repo: https://github.com/gruntwork-io/pre-commit
+    rev: v0.1.25
+    hooks:
+      - id: helmlint
+        stages: [manual] # Only run in CI, add to .github/workflows/pre-commit.yml
+  - repo: https://github.com/psf/black
+    rev: "25.1.0"
+    hooks:
+      - id: black
+  - repo: https://github.com/pycqa/isort
+    rev: "6.0.0"
+    hooks:
+      - id: isort
+  # TODO: Enable this hook when environment issues are resolved
+  - repo: https://github.com/koalaman/shellcheck-precommit
+    rev: v0.10.0
+    hooks:
+      - id: shellcheck
+        stages: [manual] # Only run in CI, add to .github/workflows/pre-commit.yml
+  - repo: https://github.com/igorshubovych/markdownlint-cli
+    rev: v0.44.0
+    hooks:
+      - id: markdownlint
+        exclude: "(\\.github/PULL_REQUEST_TEMPLATE\\.md|CODE_OF_CONDUCT\\.md)"
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.4.1
+    hooks:
+      - id: codespell
+        args: ["--ignore-words", ".codespell-ignore"]
diff --git a/operator/PROJECT b/operator/PROJECT
@@ -4,44 +4,44 @@
 # More info: https://book.kubebuilder.io/reference/project-config.html
 domain: vllm.ai
 layout:
-- go.kubebuilder.io/v4
+  - go.kubebuilder.io/v4
 projectName: production-stack
 repo: production-stack
 resources:
-- api:
-    crdVersion: v1
-    namespaced: true
-  controller: true
-  domain: vllm.ai
-  group: production stack
-  kind: VLLMRuntime
-  path: production-stack/api/v1alpha1
-  version: v1alpha1
-- api:
-    crdVersion: v1
-    namespaced: true
-  controller: true
-  domain: vllm.ai
-  group: production-stack
-  kind: VLLMRouter
-  path: production-stack/api/v1alpha1
-  version: v1alpha1
-- api:
-    crdVersion: v1
-    namespaced: true
-  controller: true
-  domain: vllm.ai
-  group: production-stack
-  kind: CacheServer
-  path: production-stack/api/v1alpha1
-  version: v1alpha1
-- api:
-    crdVersion: v1
-    namespaced: true
-  controller: true
-  domain: vllm.ai
-  group: production-stack
-  kind: LoraAdapter
-  path: production-stack/api/v1alpha1
-  version: v1alpha1
+  - api:
+      crdVersion: v1
+      namespaced: true
+    controller: true
+    domain: vllm.ai
+    group: production stack
+    kind: VLLMRuntime
+    path: production-stack/api/v1alpha1
+    version: v1alpha1
+  - api:
+      crdVersion: v1
+      namespaced: true
+    controller: true
+    domain: vllm.ai
+    group: production-stack
+    kind: VLLMRouter
+    path: production-stack/api/v1alpha1
+    version: v1alpha1
+  - api:
+      crdVersion: v1
+      namespaced: true
+    controller: true
+    domain: vllm.ai
+    group: production-stack
+    kind: CacheServer
+    path: production-stack/api/v1alpha1
+    version: v1alpha1
+  - api:
+      crdVersion: v1
+      namespaced: true
+    controller: true
+    domain: vllm.ai
+    group: production-stack
+    kind: LoraAdapter
+    path: production-stack/api/v1alpha1
+    version: v1alpha1
 version: "3"
diff --git a/operator/api/v1alpha1/loraadapter_types.go b/operator/api/v1alpha1/loraadapter_types.go
@@ -32,7 +32,7 @@ type LoraAdapterSpec struct {
 	// +kubebuilder:validation:Required
 	BaseModel string `json:"baseModel"`
 	// DeploymentConfig defines how the adapter should be deployed
-	DeploymentConfig DeploymentConfig `json:"deploymentConfig,omitempty"`
+	LoraAdapterDeploymentConfig LoraAdapterDeploymentConfig `json:"loraAdapterDeploymentConfig,omitempty"`
 	// VLLMApiKey defines the configuration for vLLM API key authentication
 	VLLMApiKey *VLLMApiKeyConfig `json:"vllmApiKey,omitempty"`
 }
@@ -63,7 +63,7 @@ type SecretRef struct {
 	Name string `json:"name,omitempty"`
 }
 
-type DeploymentConfig struct {
+type LoraAdapterDeploymentConfig struct {
 	// Algorithm specifies which placement algorithm to use.
 	// +kubebuilder:validation:Required
 	// +kubebuilder:validation:Enum=default;ordered;equalized

diff --git a/operator/api/v1alpha1/vllmruntime_types.go b/operator/api/v1alpha1/vllmruntime_types.go
@@ -24,11 +24,41 @@ import (
 // EDIT THIS FILE!  THIS IS SCAFFOLDING FOR YOU TO OWN!
 // NOTE: json tags are required.  Any new fields you add must have json tags for the fields to be serialized.
 
+// DeploymentConfig defines the deployment configuration
+type DeploymentConfig struct {
+	// Replicas
+	// +kubebuilder:default=1
+	Replicas int32 `json:"replicas,omitempty"`
+
+	// Deploy strategy
+	// +kubebuilder:validation:Enum=RollingUpdate;Recreate
+	// +kubebuilder:default=RollingUpdate
+	DeployStrategy string `json:"deploymentStrategy,omitempty"`
+
+	// Resource requirements
+	Resources ResourceRequirements `json:"resources"`
+
+	// Image configuration
+	Image ImageSpec `json:"image"`
+}
+
 // VLLMRuntimeSpec defines the desired state of VLLMRuntime
 type VLLMRuntimeSpec struct {
 	// Model configuration
 	Model ModelSpec `json:"model"`
 
+	// vLLM server configuration
+	VLLMConfig VLLMConfig `json:"vllmConfig"`
+
+	// LM Cache configuration
+	LMCacheConfig LMCacheConfig `json:"lmCacheConfig,omitempty"`
+
+	// Deployment configuration
+	DeploymentConfig DeploymentConfig `json:"deploymentConfig"`
+}
+
+// VLLMConfig defines the vLLM server configuration
+type VLLMConfig struct {
 	// Enable chunked prefill
 	EnableChunkedPrefill bool `json:"enableChunkedPrefill,omitempty"`
 
@@ -44,9 +74,6 @@ type VLLMRuntimeSpec struct {
 	// Maximum number of LoRAs
 	MaxLoras int32 `json:"maxLoras,omitempty"`
 
-	// LM Cache configuration
-	LMCacheConfig LMCacheConfig `json:"lmCacheConfig,omitempty"`
-
 	// Extra arguments for vllm serve
 	ExtraArgs []string `json:"extraArgs,omitempty"`
 
@@ -59,34 +86,19 @@ type VLLMRuntimeSpec struct {
 
 	// Environment variables
 	Env []EnvVar `json:"env,omitempty"`
+}
 
-	// Resource requirements
-	Resources ResourceRequirements `json:"resources"`
-
-	// Image configuration
-	Image ImageSpec `json:"image"`
+// ModelSpec defines the model configuration
+type ModelSpec struct {
+	// Model URL
+	ModelURL string `json:"modelURL"`
 
 	// HuggingFace token secret
 	HFTokenSecret corev1.LocalObjectReference `json:"hfTokenSecret,omitempty"`
 	// +kubebuilder:default=token
 	// +kubebuilder:validation:RequiredWhen=HFTokenSecret.Name!=""
 	HFTokenName string `json:"hfTokenName,omitempty"`
 
-	// Replicas
-	// +kubebuilder:default=1
-	Replicas int32 `json:"replicas,omitempty"`
-
-	// Deploy strategy
-	// +kubebuilder:validation:Enum=RollingUpdate;Recreate
-	// +kubebuilder:default=RollingUpdate
-	DeployStrategy string `json:"deploymentStrategy,omitempty"`
-}
-
-// ModelSpec defines the model configuration
-type ModelSpec struct {
-	// Model URL
-	ModelURL string `json:"modelURL"`
-
 	// Enable LoRA
 	EnableLoRA bool `json:"enableLoRA,omitempty"`