Skip to content

Commit ff533f8

Browse files
authored
[Misc] Refract the structure of VLLMRuntime CRD (#479)
* add CRD support for production stack Signed-off-by: royyhuang <[email protected]> * move opertor to a secondary dir instead of in root dir Signed-off-by: royyhuang <[email protected]> * rename api group from serving.vllm.ai to production-stack.vllm.ai Signed-off-by: royyhuang <[email protected]> * enable lmcache cpu offloading Signed-off-by: royyhuang <[email protected]> * enable lmcache remote cache server offloading Signed-off-by: royyhuang <[email protected]> * fix service discorvery issue by adding readiness probe to vllm pod Signed-off-by: royyhuang <[email protected]> * fix readiness probe Signed-off-by: royyhuang <[email protected]> * restructure runtime crd spec Signed-off-by: royyhuang <[email protected]> * add default operator manifest Signed-off-by: royyhuang <[email protected]> * move rbac for vllm-router pod to controller Signed-off-by: royyhuang <[email protected]> * use service account name defined in vllm-router cr to create service account instead of hardcoding Signed-off-by: royyhuang <[email protected]> * update sample loraadapter manifest Signed-off-by: royyhuang <[email protected]> * update default controller image Signed-off-by: royyhuang <[email protected]> * remove old image name patch Signed-off-by: royyhuang <[email protected]> --------- Signed-off-by: royyhuang <[email protected]>
1 parent 6e3c06f commit ff533f8

26 files changed

+2329
-688
lines changed

.pre-commit-config.yaml

Lines changed: 54 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,55 @@
11
repos:
2-
- repo: https://github.com/rhysd/actionlint
3-
rev: v1.7.7
4-
hooks:
5-
- id: actionlint
6-
- repo: https://github.com/pre-commit/pre-commit-hooks
7-
rev: v5.0.0
8-
hooks:
9-
- id: check-json
10-
- id: check-toml
11-
- id: check-yaml
12-
args: [--allow-multiple-documents]
13-
exclude: |
14-
(?x)(
15-
^helm/templates/|
16-
.github/deployment-router.yaml
17-
)
18-
- id: end-of-file-fixer
19-
- id: requirements-txt-fixer
20-
- id: trailing-whitespace
21-
- repo: https://github.com/hadolint/hadolint
22-
rev: v2.12.0
23-
hooks:
24-
- id: hadolint-docker
25-
stages: [manual] # Only run in CI, add to .github/workflows/pre-commit.yml
26-
exclude: "src/gateway_inference_extension/Dockerfile"
27-
- repo: https://github.com/gruntwork-io/pre-commit
28-
rev: v0.1.25
29-
hooks:
30-
- id: helmlint
31-
stages: [manual] # Only run in CI, add to .github/workflows/pre-commit.yml
32-
- repo: https://github.com/psf/black
33-
rev: '25.1.0'
34-
hooks:
35-
- id: black
36-
- repo: https://github.com/pycqa/isort
37-
rev: '6.0.0'
38-
hooks:
39-
- id: isort
40-
# TODO: Enable this hook when environment issues are resolved
41-
- repo: https://github.com/koalaman/shellcheck-precommit
42-
rev: v0.10.0
43-
hooks:
44-
- id: shellcheck
45-
stages: [manual] # Only run in CI, add to .github/workflows/pre-commit.yml
46-
- repo: https://github.com/igorshubovych/markdownlint-cli
47-
rev: v0.44.0
48-
hooks:
49-
- id: markdownlint
50-
exclude: "(\\.github/PULL_REQUEST_TEMPLATE\\.md|CODE_OF_CONDUCT\\.md)"
51-
- repo: https://github.com/codespell-project/codespell
52-
rev: v2.4.1
53-
hooks:
54-
- id: codespell
55-
args: ["--ignore-words", ".codespell-ignore"]
2+
- repo: https://github.com/rhysd/actionlint
3+
rev: v1.7.7
4+
hooks:
5+
- id: actionlint
6+
- repo: https://github.com/pre-commit/pre-commit-hooks
7+
rev: v5.0.0
8+
hooks:
9+
- id: check-json
10+
- id: check-toml
11+
- id: check-yaml
12+
args: ["--allow-multiple-documents"]
13+
exclude: |
14+
(?x)(
15+
^helm/templates/|
16+
.github/deployment-router.yaml
17+
)
18+
- id: end-of-file-fixer
19+
- id: requirements-txt-fixer
20+
- id: trailing-whitespace
21+
- repo: https://github.com/hadolint/hadolint
22+
rev: v2.12.0
23+
hooks:
24+
- id: hadolint-docker
25+
stages: [manual] # Only run in CI, add to .github/workflows/pre-commit.yml
26+
exclude: "src/gateway_inference_extension/Dockerfile"
27+
- repo: https://github.com/gruntwork-io/pre-commit
28+
rev: v0.1.25
29+
hooks:
30+
- id: helmlint
31+
stages: [manual] # Only run in CI, add to .github/workflows/pre-commit.yml
32+
- repo: https://github.com/psf/black
33+
rev: "25.1.0"
34+
hooks:
35+
- id: black
36+
- repo: https://github.com/pycqa/isort
37+
rev: "6.0.0"
38+
hooks:
39+
- id: isort
40+
# TODO: Enable this hook when environment issues are resolved
41+
- repo: https://github.com/koalaman/shellcheck-precommit
42+
rev: v0.10.0
43+
hooks:
44+
- id: shellcheck
45+
stages: [manual] # Only run in CI, add to .github/workflows/pre-commit.yml
46+
- repo: https://github.com/igorshubovych/markdownlint-cli
47+
rev: v0.44.0
48+
hooks:
49+
- id: markdownlint
50+
exclude: "(\\.github/PULL_REQUEST_TEMPLATE\\.md|CODE_OF_CONDUCT\\.md)"
51+
- repo: https://github.com/codespell-project/codespell
52+
rev: v2.4.1
53+
hooks:
54+
- id: codespell
55+
args: ["--ignore-words", ".codespell-ignore"]

operator/PROJECT

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -4,44 +4,44 @@
44
# More info: https://book.kubebuilder.io/reference/project-config.html
55
domain: vllm.ai
66
layout:
7-
- go.kubebuilder.io/v4
7+
- go.kubebuilder.io/v4
88
projectName: production-stack
99
repo: production-stack
1010
resources:
11-
- api:
12-
crdVersion: v1
13-
namespaced: true
14-
controller: true
15-
domain: vllm.ai
16-
group: production stack
17-
kind: VLLMRuntime
18-
path: production-stack/api/v1alpha1
19-
version: v1alpha1
20-
- api:
21-
crdVersion: v1
22-
namespaced: true
23-
controller: true
24-
domain: vllm.ai
25-
group: production-stack
26-
kind: VLLMRouter
27-
path: production-stack/api/v1alpha1
28-
version: v1alpha1
29-
- api:
30-
crdVersion: v1
31-
namespaced: true
32-
controller: true
33-
domain: vllm.ai
34-
group: production-stack
35-
kind: CacheServer
36-
path: production-stack/api/v1alpha1
37-
version: v1alpha1
38-
- api:
39-
crdVersion: v1
40-
namespaced: true
41-
controller: true
42-
domain: vllm.ai
43-
group: production-stack
44-
kind: LoraAdapter
45-
path: production-stack/api/v1alpha1
46-
version: v1alpha1
11+
- api:
12+
crdVersion: v1
13+
namespaced: true
14+
controller: true
15+
domain: vllm.ai
16+
group: production stack
17+
kind: VLLMRuntime
18+
path: production-stack/api/v1alpha1
19+
version: v1alpha1
20+
- api:
21+
crdVersion: v1
22+
namespaced: true
23+
controller: true
24+
domain: vllm.ai
25+
group: production-stack
26+
kind: VLLMRouter
27+
path: production-stack/api/v1alpha1
28+
version: v1alpha1
29+
- api:
30+
crdVersion: v1
31+
namespaced: true
32+
controller: true
33+
domain: vllm.ai
34+
group: production-stack
35+
kind: CacheServer
36+
path: production-stack/api/v1alpha1
37+
version: v1alpha1
38+
- api:
39+
crdVersion: v1
40+
namespaced: true
41+
controller: true
42+
domain: vllm.ai
43+
group: production-stack
44+
kind: LoraAdapter
45+
path: production-stack/api/v1alpha1
46+
version: v1alpha1
4747
version: "3"

operator/api/v1alpha1/loraadapter_types.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ type LoraAdapterSpec struct {
3232
// +kubebuilder:validation:Required
3333
BaseModel string `json:"baseModel"`
3434
// DeploymentConfig defines how the adapter should be deployed
35-
DeploymentConfig DeploymentConfig `json:"deploymentConfig,omitempty"`
35+
LoraAdapterDeploymentConfig LoraAdapterDeploymentConfig `json:"loraAdapterDeploymentConfig,omitempty"`
3636
// VLLMApiKey defines the configuration for vLLM API key authentication
3737
VLLMApiKey *VLLMApiKeyConfig `json:"vllmApiKey,omitempty"`
3838
}
@@ -63,7 +63,7 @@ type SecretRef struct {
6363
Name string `json:"name,omitempty"`
6464
}
6565

66-
type DeploymentConfig struct {
66+
type LoraAdapterDeploymentConfig struct {
6767
// Algorithm specifies which placement algorithm to use.
6868
// +kubebuilder:validation:Required
6969
// +kubebuilder:validation:Enum=default;ordered;equalized

operator/api/v1alpha1/vllmruntime_types.go

Lines changed: 35 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,41 @@ import (
2424
// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
2525
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.
2626

27+
// DeploymentConfig defines the deployment configuration
28+
type DeploymentConfig struct {
29+
// Replicas
30+
// +kubebuilder:default=1
31+
Replicas int32 `json:"replicas,omitempty"`
32+
33+
// Deploy strategy
34+
// +kubebuilder:validation:Enum=RollingUpdate;Recreate
35+
// +kubebuilder:default=RollingUpdate
36+
DeployStrategy string `json:"deploymentStrategy,omitempty"`
37+
38+
// Resource requirements
39+
Resources ResourceRequirements `json:"resources"`
40+
41+
// Image configuration
42+
Image ImageSpec `json:"image"`
43+
}
44+
2745
// VLLMRuntimeSpec defines the desired state of VLLMRuntime
2846
type VLLMRuntimeSpec struct {
2947
// Model configuration
3048
Model ModelSpec `json:"model"`
3149

50+
// vLLM server configuration
51+
VLLMConfig VLLMConfig `json:"vllmConfig"`
52+
53+
// LM Cache configuration
54+
LMCacheConfig LMCacheConfig `json:"lmCacheConfig,omitempty"`
55+
56+
// Deployment configuration
57+
DeploymentConfig DeploymentConfig `json:"deploymentConfig"`
58+
}
59+
60+
// VLLMConfig defines the vLLM server configuration
61+
type VLLMConfig struct {
3262
// Enable chunked prefill
3363
EnableChunkedPrefill bool `json:"enableChunkedPrefill,omitempty"`
3464

@@ -44,9 +74,6 @@ type VLLMRuntimeSpec struct {
4474
// Maximum number of LoRAs
4575
MaxLoras int32 `json:"maxLoras,omitempty"`
4676

47-
// LM Cache configuration
48-
LMCacheConfig LMCacheConfig `json:"lmCacheConfig,omitempty"`
49-
5077
// Extra arguments for vllm serve
5178
ExtraArgs []string `json:"extraArgs,omitempty"`
5279

@@ -59,34 +86,19 @@ type VLLMRuntimeSpec struct {
5986

6087
// Environment variables
6188
Env []EnvVar `json:"env,omitempty"`
89+
}
6290

63-
// Resource requirements
64-
Resources ResourceRequirements `json:"resources"`
65-
66-
// Image configuration
67-
Image ImageSpec `json:"image"`
91+
// ModelSpec defines the model configuration
92+
type ModelSpec struct {
93+
// Model URL
94+
ModelURL string `json:"modelURL"`
6895

6996
// HuggingFace token secret
7097
HFTokenSecret corev1.LocalObjectReference `json:"hfTokenSecret,omitempty"`
7198
// +kubebuilder:default=token
7299
// +kubebuilder:validation:RequiredWhen=HFTokenSecret.Name!=""
73100
HFTokenName string `json:"hfTokenName,omitempty"`
74101

75-
// Replicas
76-
// +kubebuilder:default=1
77-
Replicas int32 `json:"replicas,omitempty"`
78-
79-
// Deploy strategy
80-
// +kubebuilder:validation:Enum=RollingUpdate;Recreate
81-
// +kubebuilder:default=RollingUpdate
82-
DeployStrategy string `json:"deploymentStrategy,omitempty"`
83-
}
84-
85-
// ModelSpec defines the model configuration
86-
type ModelSpec struct {
87-
// Model URL
88-
ModelURL string `json:"modelURL"`
89-
90102
// Enable LoRA
91103
EnableLoRA bool `json:"enableLoRA,omitempty"`
92104

0 commit comments

Comments
 (0)