From 2a6cf602350b13891490dc8597d9d3b1726d4819 Mon Sep 17 00:00:00 2001 From: royyhuang Date: Thu, 8 May 2025 05:10:33 +0000 Subject: [PATCH 01/14] add CRD support for production stack Signed-off-by: royyhuang --- .dockerignore | 3 + .gitignore | 31 + Dockerfile | 33 ++ Makefile | 225 ++++++++ PROJECT | 29 + api/v1alpha1/groupversion_info.go | 36 ++ api/v1alpha1/vllmrouter_types.go | 128 +++++ api/v1alpha1/vllmruntime_types.go | 184 ++++++ api/v1alpha1/zz_generated.deepcopy.go | 316 +++++++++++ cmd/main.go | 252 +++++++++ ...production-stack.vllm.ai_staticroutes.yaml | 218 +++++++ .../bases/serving.vllm.ai_vllmrouters.yaml | 251 ++++++++ .../bases/serving.vllm.ai_vllmruntimes.yaml | 216 +++++++ config/crd/kustomization.yaml | 17 + config/crd/kustomizeconfig.yaml | 19 + .../default/cert_metrics_manager_patch.yaml | 30 + config/default/kustomization.yaml | 234 ++++++++ config/default/manager_metrics_patch.yaml | 4 + config/default/metrics_service.yaml | 18 + config/manager/deployment.yaml | 92 +++ config/manager/kustomization.yaml | 9 + config/manager/namespace.yaml | 8 + .../network-policy/allow-metrics-traffic.yaml | 27 + config/network-policy/kustomization.yaml | 2 + config/prometheus/kustomization.yaml | 11 + config/prometheus/monitor.yaml | 27 + config/prometheus/monitor_tls_patch.yaml | 19 + config/rbac/kustomization.yaml | 30 + config/rbac/leader_election_role.yaml | 40 ++ config/rbac/leader_election_role_binding.yaml | 15 + config/rbac/metrics_auth_role.yaml | 17 + config/rbac/metrics_auth_role_binding.yaml | 12 + config/rbac/metrics_reader_role.yaml | 9 + config/rbac/pod_viewer_role.yaml | 17 + config/rbac/role.yaml | 95 ++++ config/rbac/role_binding.yaml | 15 + config/rbac/service_account.yaml | 8 + config/rbac/vllmrouter_admin_role.yaml | 27 + config/rbac/vllmrouter_editor_role.yaml | 33 ++ config/rbac/vllmrouter_role_binding.yaml | 16 + config/rbac/vllmrouter_service_account.yaml | 8 + config/rbac/vllmrouter_viewer_role.yaml | 29 + config/rbac/vllmruntime_admin_role.yaml | 27 + config/rbac/vllmruntime_editor_role.yaml | 33 ++ config/rbac/vllmruntime_viewer_role.yaml | 29 + config/samples/kustomization.yaml | 8 + config/samples/serving_v1alpha1_router.yaml | 57 ++ .../samples/serving_v1alpha1_vllmruntime.yaml | 63 +++ go.mod | 100 ++++ go.sum | 254 +++++++++ hack/boilerplate.go.txt | 15 + internal/controller/suite_test.go | 88 +++ internal/controller/vllmrouter_controller.go | 383 +++++++++++++ .../controller/vllmrouter_controller_test.go | 84 +++ internal/controller/vllmruntime_controller.go | 534 ++++++++++++++++++ .../controller/vllmruntime_controller_test.go | 84 +++ 56 files changed, 4569 insertions(+) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100644 Makefile create mode 100644 PROJECT create mode 100644 api/v1alpha1/groupversion_info.go create mode 100644 api/v1alpha1/vllmrouter_types.go create mode 100644 api/v1alpha1/vllmruntime_types.go create mode 100644 api/v1alpha1/zz_generated.deepcopy.go create mode 100644 cmd/main.go create mode 100644 config/crd/bases/production-stack.vllm.ai_staticroutes.yaml create mode 100644 config/crd/bases/serving.vllm.ai_vllmrouters.yaml create mode 100644 config/crd/bases/serving.vllm.ai_vllmruntimes.yaml create mode 100644 config/crd/kustomization.yaml create mode 100644 config/crd/kustomizeconfig.yaml create mode 100644 config/default/cert_metrics_manager_patch.yaml create mode 100644 config/default/kustomization.yaml create mode 100644 config/default/manager_metrics_patch.yaml create mode 100644 config/default/metrics_service.yaml create mode 100644 config/manager/deployment.yaml create mode 100644 config/manager/kustomization.yaml create mode 100644 config/manager/namespace.yaml create mode 100644 config/network-policy/allow-metrics-traffic.yaml create mode 100644 config/network-policy/kustomization.yaml create mode 100644 config/prometheus/kustomization.yaml create mode 100644 config/prometheus/monitor.yaml create mode 100644 config/prometheus/monitor_tls_patch.yaml create mode 100644 config/rbac/kustomization.yaml create mode 100644 config/rbac/leader_election_role.yaml create mode 100644 config/rbac/leader_election_role_binding.yaml create mode 100644 config/rbac/metrics_auth_role.yaml create mode 100644 config/rbac/metrics_auth_role_binding.yaml create mode 100644 config/rbac/metrics_reader_role.yaml create mode 100644 config/rbac/pod_viewer_role.yaml create mode 100644 config/rbac/role.yaml create mode 100644 config/rbac/role_binding.yaml create mode 100644 config/rbac/service_account.yaml create mode 100644 config/rbac/vllmrouter_admin_role.yaml create mode 100644 config/rbac/vllmrouter_editor_role.yaml create mode 100644 config/rbac/vllmrouter_role_binding.yaml create mode 100644 config/rbac/vllmrouter_service_account.yaml create mode 100644 config/rbac/vllmrouter_viewer_role.yaml create mode 100644 config/rbac/vllmruntime_admin_role.yaml create mode 100644 config/rbac/vllmruntime_editor_role.yaml create mode 100644 config/rbac/vllmruntime_viewer_role.yaml create mode 100644 config/samples/kustomization.yaml create mode 100644 config/samples/serving_v1alpha1_router.yaml create mode 100644 config/samples/serving_v1alpha1_vllmruntime.yaml create mode 100644 go.mod create mode 100644 go.sum create mode 100644 hack/boilerplate.go.txt create mode 100644 internal/controller/suite_test.go create mode 100644 internal/controller/vllmrouter_controller.go create mode 100644 internal/controller/vllmrouter_controller_test.go create mode 100644 internal/controller/vllmruntime_controller.go create mode 100644 internal/controller/vllmruntime_controller_test.go diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..a3aab7af7 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +# More info: https://docs.docker.com/engine/reference/builder/#dockerignore-file +# Ignore build and test binaries. +bin/ diff --git a/.gitignore b/.gitignore index 4bc1bdf22..1341491df 100644 --- a/.gitignore +++ b/.gitignore @@ -147,3 +147,34 @@ terraform.rc # google cloud platform credentials.json + +.idea/ +.vscode/ +WORKSPACE +.DS_Store +# don't check in the build output of the book +docs/book/book/ + +# ignore auto-generated dir by `mdbook serve` +docs/book/src/docs + +# Editor temp files +*~ +\#*# +*.swp + +# Skip bazel dirs +/bazel-* + +# skip bin dirs +**/bin +**/testbin + +# skip .out files (coverage tests) +*.out + +# skip testdata go.sum, since it may have +# different result depending on go version +/testdata/**/go.sum +/docs/book/src/simple-external-plugin-tutorial/testdata/sampleexternalplugin/v1/bin +/testdata/**legacy** diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..4ea148aed --- /dev/null +++ b/Dockerfile @@ -0,0 +1,33 @@ +# Build the manager binary +FROM docker.io/golang:1.24 AS builder +ARG TARGETOS +ARG TARGETARCH + +WORKDIR /workspace +# Copy the Go Modules manifests +COPY go.mod go.mod +COPY go.sum go.sum +# cache deps before building and copying source so that we don't need to re-download as much +# and so that source changes don't invalidate our downloaded layer +RUN go mod download + +# Copy the go source +COPY cmd/main.go cmd/main.go +COPY api/ api/ +COPY internal/ internal/ + +# Build +# the GOARCH has not a default value to allow the binary be built according to the host where the command +# was called. For example, if we call make docker-build in a local env which has the Apple Silicon M1 SO +# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore, +# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform. +RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager cmd/main.go + +# Use distroless as minimal base image to package the manager binary +# Refer to https://github.com/GoogleContainerTools/distroless for more details +FROM gcr.io/distroless/static:nonroot +WORKDIR / +COPY --from=builder /workspace/manager . +USER 65532:65532 + +ENTRYPOINT ["/manager"] diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..a34e95dd6 --- /dev/null +++ b/Makefile @@ -0,0 +1,225 @@ +# Image URL to use all building/pushing image targets +IMG ?= controller:latest + +# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) +ifeq (,$(shell go env GOBIN)) +GOBIN=$(shell go env GOPATH)/bin +else +GOBIN=$(shell go env GOBIN) +endif + +# CONTAINER_TOOL defines the container tool to be used for building images. +# Be aware that the target commands are only tested with Docker which is +# scaffolded by default. However, you might want to replace it to use other +# tools. (i.e. podman) +CONTAINER_TOOL ?= docker + +# Setting SHELL to bash allows bash commands to be executed by recipes. +# Options are set to exit when a recipe line exits non-zero or a piped command fails. +SHELL = /usr/bin/env bash -o pipefail +.SHELLFLAGS = -ec + +.PHONY: all +all: build + +##@ General + +# The help target prints out all targets with their descriptions organized +# beneath their categories. The categories are represented by '##@' and the +# target descriptions by '##'. The awk command is responsible for reading the +# entire set of makefiles included in this invocation, looking for lines of the +# file as xyz: ## something, and then pretty-format the target and help. Then, +# if there's a line with ##@ something, that gets pretty-printed as a category. +# More info on the usage of ANSI control characters for terminal formatting: +# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters +# More info on the awk command: +# http://linuxcommand.org/lc3_adv_awk.php + +.PHONY: help +help: ## Display this help. + @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) + +##@ Development + +.PHONY: manifests +manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects. + $(CONTROLLER_GEN) rbac:roleName=manager-role crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases + +.PHONY: generate +generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations. + $(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..." + +.PHONY: fmt +fmt: ## Run go fmt against code. + go fmt ./... + +.PHONY: vet +vet: ## Run go vet against code. + go vet ./... + +.PHONY: test +test: manifests generate fmt vet setup-envtest ## Run tests. + KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v /e2e) -coverprofile cover.out + +# TODO(user): To use a different vendor for e2e tests, modify the setup under 'tests/e2e'. +# The default setup assumes Kind is pre-installed and builds/loads the Manager Docker image locally. +# CertManager is installed by default; skip with: +# - CERT_MANAGER_INSTALL_SKIP=true +.PHONY: test-e2e +test-e2e: manifests generate fmt vet ## Run the e2e tests. Expected an isolated environment using Kind. + @command -v $(KIND) >/dev/null 2>&1 || { \ + echo "Kind is not installed. Please install Kind manually."; \ + exit 1; \ + } + @$(KIND) get clusters | grep -q 'kind' || { \ + echo "No Kind cluster is running. Please start a Kind cluster before running the e2e tests."; \ + exit 1; \ + } + go test ./test/e2e/ -v -ginkgo.v + +.PHONY: lint +lint: golangci-lint ## Run golangci-lint linter + $(GOLANGCI_LINT) run + +.PHONY: lint-fix +lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes + $(GOLANGCI_LINT) run --fix + +.PHONY: lint-config +lint-config: golangci-lint ## Verify golangci-lint linter configuration + $(GOLANGCI_LINT) config verify + +##@ Build + +.PHONY: build +build: manifests generate fmt vet ## Build manager binary. + go build -o bin/manager cmd/main.go + +.PHONY: run +run: manifests generate fmt vet ## Run a controller from your host. + go run ./cmd/main.go + +# If you wish to build the manager image targeting other platforms you can use the --platform flag. +# (i.e. docker build --platform linux/arm64). However, you must enable docker buildKit for it. +# More info: https://docs.docker.com/develop/develop-images/build_enhancements/ +.PHONY: docker-build +docker-build: ## Build docker image with the manager. + $(CONTAINER_TOOL) build -t ${IMG} . + +.PHONY: docker-push +docker-push: ## Push docker image with the manager. + $(CONTAINER_TOOL) push ${IMG} + +# PLATFORMS defines the target platforms for the manager image be built to provide support to multiple +# architectures. (i.e. make docker-buildx IMG=myregistry/mypoperator:0.0.1). To use this option you need to: +# - be able to use docker buildx. More info: https://docs.docker.com/build/buildx/ +# - have enabled BuildKit. More info: https://docs.docker.com/develop/develop-images/build_enhancements/ +# - be able to push the image to your registry (i.e. if you do not set a valid value via IMG=> then the export will fail) +# To adequately provide solutions that are compatible with multiple platforms, you should consider using this option. +PLATFORMS ?= linux/arm64,linux/amd64,linux/s390x,linux/ppc64le +.PHONY: docker-buildx +docker-buildx: ## Build and push docker image for the manager for cross-platform support + # copy existing Dockerfile and insert --platform=${BUILDPLATFORM} into Dockerfile.cross, and preserve the original Dockerfile + sed -e '1 s/\(^FROM\)/FROM --platform=\$$\{BUILDPLATFORM\}/; t' -e ' 1,// s//FROM --platform=\$$\{BUILDPLATFORM\}/' Dockerfile > Dockerfile.cross + - $(CONTAINER_TOOL) buildx create --name production-stack-builder + $(CONTAINER_TOOL) buildx use production-stack-builder + - $(CONTAINER_TOOL) buildx build --push --platform=$(PLATFORMS) --tag ${IMG} -f Dockerfile.cross . + - $(CONTAINER_TOOL) buildx rm production-stack-builder + rm Dockerfile.cross + +.PHONY: build-installer +build-installer: manifests generate kustomize ## Generate a consolidated YAML with CRDs and deployment. + mkdir -p dist + cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} + $(KUSTOMIZE) build config/default > dist/install.yaml + +##@ Deployment + +ifndef ignore-not-found + ignore-not-found = false +endif + +.PHONY: install +install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config. + $(KUSTOMIZE) build config/crd | $(KUBECTL) apply -f - + +.PHONY: uninstall +uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. + $(KUSTOMIZE) build config/crd | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f - + +.PHONY: deploy +deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config. + cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} + $(KUSTOMIZE) build config/default | $(KUBECTL) apply -f - + +.PHONY: undeploy +undeploy: kustomize ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. + $(KUSTOMIZE) build config/default | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f - + +##@ Dependencies + +## Location to install dependencies to +LOCALBIN ?= $(shell pwd)/bin +$(LOCALBIN): + mkdir -p $(LOCALBIN) + +## Tool Binaries +KUBECTL ?= kubectl +KIND ?= kind +KUSTOMIZE ?= $(LOCALBIN)/kustomize +CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen +ENVTEST ?= $(LOCALBIN)/setup-envtest +GOLANGCI_LINT = $(LOCALBIN)/golangci-lint + +## Tool Versions +KUSTOMIZE_VERSION ?= v5.6.0 +CONTROLLER_TOOLS_VERSION ?= v0.17.2 +#ENVTEST_VERSION is the version of controller-runtime release branch to fetch the envtest setup script (i.e. release-0.20) +ENVTEST_VERSION ?= $(shell go list -m -f "{{ .Version }}" sigs.k8s.io/controller-runtime | awk -F'[v.]' '{printf "release-%d.%d", $$2, $$3}') +#ENVTEST_K8S_VERSION is the version of Kubernetes to use for setting up ENVTEST binaries (i.e. 1.31) +ENVTEST_K8S_VERSION ?= $(shell go list -m -f "{{ .Version }}" k8s.io/api | awk -F'[v.]' '{printf "1.%d", $$3}') +GOLANGCI_LINT_VERSION ?= v1.63.4 + +.PHONY: kustomize +kustomize: $(KUSTOMIZE) ## Download kustomize locally if necessary. +$(KUSTOMIZE): $(LOCALBIN) + $(call go-install-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/v5,$(KUSTOMIZE_VERSION)) + +.PHONY: controller-gen +controller-gen: $(CONTROLLER_GEN) ## Download controller-gen locally if necessary. +$(CONTROLLER_GEN): $(LOCALBIN) + $(call go-install-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen,$(CONTROLLER_TOOLS_VERSION)) + +.PHONY: setup-envtest +setup-envtest: envtest ## Download the binaries required for ENVTEST in the local bin directory. + @echo "Setting up envtest binaries for Kubernetes version $(ENVTEST_K8S_VERSION)..." + @$(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path || { \ + echo "Error: Failed to set up envtest binaries for version $(ENVTEST_K8S_VERSION)."; \ + exit 1; \ + } + +.PHONY: envtest +envtest: $(ENVTEST) ## Download setup-envtest locally if necessary. +$(ENVTEST): $(LOCALBIN) + $(call go-install-tool,$(ENVTEST),sigs.k8s.io/controller-runtime/tools/setup-envtest,$(ENVTEST_VERSION)) + +.PHONY: golangci-lint +golangci-lint: $(GOLANGCI_LINT) ## Download golangci-lint locally if necessary. +$(GOLANGCI_LINT): $(LOCALBIN) + $(call go-install-tool,$(GOLANGCI_LINT),github.com/golangci/golangci-lint/cmd/golangci-lint,$(GOLANGCI_LINT_VERSION)) + +# go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist +# $1 - target path with name of binary +# $2 - package url which can be installed +# $3 - specific version of package +define go-install-tool +@[ -f "$(1)-$(3)" ] || { \ +set -e; \ +package=$(2)@$(3) ;\ +echo "Downloading $${package}" ;\ +rm -f $(1) || true ;\ +GOBIN=$(LOCALBIN) go install $${package} ;\ +mv $(1) $(1)-$(3) ;\ +} ;\ +ln -sf $(1)-$(3) $(1) +endef diff --git a/PROJECT b/PROJECT new file mode 100644 index 000000000..97a77c2eb --- /dev/null +++ b/PROJECT @@ -0,0 +1,29 @@ +# Code generated by tool. DO NOT EDIT. +# This file is used to track the info used to scaffold your project +# and allow the plugins properly work. +# More info: https://book.kubebuilder.io/reference/project-config.html +domain: vllm.ai +layout: +- go.kubebuilder.io/v4 +projectName: production-stack +repo: production-stack +resources: +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: vllm.ai + group: serving + kind: VLLMRuntime + path: production-stack/api/v1alpha1 + version: v1alpha1 +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: vllm.ai + group: serving + kind: Router + path: production-stack/api/v1alpha1 + version: v1alpha1 +version: "3" diff --git a/api/v1alpha1/groupversion_info.go b/api/v1alpha1/groupversion_info.go new file mode 100644 index 000000000..9d3c2bf50 --- /dev/null +++ b/api/v1alpha1/groupversion_info.go @@ -0,0 +1,36 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package v1alpha1 contains API Schema definitions for the serving v1alpha1 API group. +// +kubebuilder:object:generate=true +// +groupName=serving.vllm.ai +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/scheme" +) + +var ( + // GroupVersion is group version used to register these objects. + GroupVersion = schema.GroupVersion{Group: "serving.vllm.ai", Version: "v1alpha1"} + + // SchemeBuilder is used to add go types to the GroupVersionKind scheme. + SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} + + // AddToScheme adds the types in this group-version to the given scheme. + AddToScheme = SchemeBuilder.AddToScheme +) diff --git a/api/v1alpha1/vllmrouter_types.go b/api/v1alpha1/vllmrouter_types.go new file mode 100644 index 000000000..2bf632949 --- /dev/null +++ b/api/v1alpha1/vllmrouter_types.go @@ -0,0 +1,128 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! +// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. + +// VLLMRouterSpec defines the desired state of VLLMRouter +type VLLMRouterSpec struct { + // EnableRouter determines if the router should be deployed + // +kubebuilder:default=true + EnableRouter bool `json:"enableRouter,omitempty"` + + // Replicas specifies the number of router replicas + // +kubebuilder:default=1 + Replicas int32 `json:"replicas,omitempty"` + + // ServiceDiscovery specifies the service discovery method (k8s or static) + // +kubebuilder:validation:Enum=k8s;static + // +kubebuilder:default=k8s + ServiceDiscovery string `json:"serviceDiscovery,omitempty"` + + // StaticBackends is required when using static service discovery + // +kubebuilder:validation:RequiredWhen=ServiceDiscovery=static + StaticBackends string `json:"staticBackends,omitempty"` + + // StaticModels is required when using static service discovery + // +kubebuilder:validation:RequiredWhen=ServiceDiscovery=static + StaticModels string `json:"staticModels,omitempty"` + + // RoutingLogic specifies the routing strategy + // +kubebuilder:validation:Enum=roundrobin;session + // +kubebuilder:default=roundrobin + RoutingLogic string `json:"routingLogic,omitempty"` + + // SessionKey for session-based routing + // +kubebuilder:validation:RequiredWhen=RoutingLogic=session + // +kubebuilder:default="" + SessionKey string `json:"sessionKey,omitempty"` + + // EngineScrapeInterval for collecting engine statistics + EngineScrapeInterval string `json:"engineScrapeInterval,omitempty"` + + // RequestStatsWindow for request statistics + RequestStatsWindow string `json:"requestStatsWindow,omitempty"` + + // ExtraArgs for additional router arguments + ExtraArgs []string `json:"extraArgs,omitempty"` + + // NodeSelectorTerms for pod scheduling + NodeSelectorTerms []corev1.NodeSelectorTerm `json:"nodeSelectorTerms,omitempty"` + + // ServiceAccountName for the router pod + ServiceAccountName string `json:"serviceAccountName,omitempty"` + + // ContainerPort for the router service + // +kubebuilder:default=80 + Port int32 `json:"port,omitempty"` + + // Image configuration + Image ImageSpec `json:"image"` + + // Resource requirements + Resources ResourceRequirements `json:"resources"` + + // Environment variables + Env []EnvVar `json:"env,omitempty"` + + // VLLM API Key configuration + VLLMApiKeySecret corev1.LocalObjectReference `json:"vllmApiKeySecret,omitempty"` + VLLMApiKeyName string `json:"vllmApiKeyName,omitempty"` +} + +// VLLMRouterStatus defines the observed state of VLLMRouter +type VLLMRouterStatus struct { + // Router status + Status string `json:"status,omitempty"` + + // Last updated timestamp + LastUpdated metav1.Time `json:"lastUpdated,omitempty"` + + // Number of active runtimes + ActiveRuntimes int32 `json:"activeRuntimes,omitempty"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status + +// VLLMRouter is the Schema for the vllmrouters API +type VLLMRouter struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec VLLMRouterSpec `json:"spec,omitempty"` + Status VLLMRouterStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// VLLMRouterList contains a list of VLLMRouter +type VLLMRouterList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []VLLMRouter `json:"items"` +} + +func init() { + SchemeBuilder.Register(&VLLMRouter{}, &VLLMRouterList{}) +} diff --git a/api/v1alpha1/vllmruntime_types.go b/api/v1alpha1/vllmruntime_types.go new file mode 100644 index 000000000..117a77463 --- /dev/null +++ b/api/v1alpha1/vllmruntime_types.go @@ -0,0 +1,184 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! +// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. + +// VLLMRuntimeSpec defines the desired state of VLLMRuntime +type VLLMRuntimeSpec struct { + // Model configuration + Model ModelSpec `json:"model"` + + // Enable chunked prefill + EnableChunkedPrefill bool `json:"enableChunkedPrefill,omitempty"` + + // Enable prefix caching + EnablePrefixCaching bool `json:"enablePrefixCaching,omitempty"` + + // Tensor parallel size + TensorParallelSize int32 `json:"tensorParallelSize,omitempty"` + + // GPU memory utilization + GpuMemoryUtilization string `json:"gpuMemoryUtilization,omitempty"` + + // Maximum number of LoRAs + MaxLoras int32 `json:"maxLoras,omitempty"` + + // LM Cache configuration + LMCacheConfig LMCacheConfig `json:"lmCacheConfig,omitempty"` + + // Extra arguments for vllm serve + ExtraArgs []string `json:"extraArgs,omitempty"` + + // Use V1 API + V1 bool `json:"v1,omitempty"` + + // Port for vLLM server + // +kubebuilder:default=8000 + Port int32 `json:"port,omitempty"` + + // Environment variables + Env []EnvVar `json:"env,omitempty"` + + // Resource requirements + Resources ResourceRequirements `json:"resources"` + + // Image configuration + Image ImageSpec `json:"image"` + + // HuggingFace token secret + HFTokenSecret corev1.LocalObjectReference `json:"hfTokenSecret,omitempty"` + // +kubebuilder:default=token + // +kubebuilder:validation:RequiredWhen=HFTokenSecret.Name!="" + HFTokenName string `json:"hfTokenName,omitempty"` + + // Replicas + // +kubebuilder:default=1 + Replicas int32 `json:"replicas,omitempty"` + + // Deploy strategy + // +kubebuilder:validation:Enum=RollingUpdate;Recreate + // +kubebuilder:default=RollingUpdate + DeployStrategy string `json:"deploymentStrategy,omitempty"` +} + +// ModelSpec defines the model configuration +type ModelSpec struct { + // Model URL + ModelURL string `json:"modelURL"` + + // Enable LoRA + EnableLoRA bool `json:"enableLoRA,omitempty"` + + // Enable tool + EnableTool bool `json:"enableTool,omitempty"` + + // Tool call parser + ToolCallParser string `json:"toolCallParser,omitempty"` + + // Maximum model length + MaxModelLen int32 `json:"maxModelLen,omitempty"` + + // Data type + DType string `json:"dtype,omitempty"` + + // Maximum number of sequences + MaxNumSeqs int32 `json:"maxNumSeqs,omitempty"` +} + +// LMCacheConfig defines the LM Cache configuration +type LMCacheConfig struct { + // Enabled enables LM Cache + // +kubebuilder:default=false + Enabled bool `json:"enabled,omitempty"` + + // CPUOffloadingBufferSize is the size of the CPU offloading buffer + // +kubebuilder:default="4Gi" + CPUOffloadingBufferSize string `json:"cpuOffloadingBufferSize,omitempty"` + + // DiskOffloadingBufferSize is the size of the disk offloading buffer + // +kubebuilder:default="8Gi" + DiskOffloadingBufferSize string `json:"diskOffloadingBufferSize,omitempty"` + + // RemoteURL is the URL of the remote cache server + RemoteURL string `json:"remoteUrl,omitempty"` + + // RemoteSerde is the serialization format for the remote cache + RemoteSerde string `json:"remoteSerde,omitempty"` +} + +// EnvVar represents an environment variable +type EnvVar struct { + Name string `json:"name"` + Value string `json:"value"` +} + +// ResourceRequirements defines the resource requirements +type ResourceRequirements struct { + CPU string `json:"cpu,omitempty"` + Memory string `json:"memory,omitempty"` + GPU string `json:"gpu,omitempty"` +} + +// ImageSpec defines the container image configuration +type ImageSpec struct { + Registry string `json:"registry"` + Name string `json:"name"` + PullPolicy string `json:"pullPolicy,omitempty"` + PullSecretName string `json:"pullSecretName,omitempty"` +} + +// VLLMRuntimeStatus defines the observed state of VLLMRuntime +type VLLMRuntimeStatus struct { + // Model status + ModelStatus string `json:"modelStatus,omitempty"` + + // Last updated timestamp + LastUpdated metav1.Time `json:"lastUpdated,omitempty"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:resource:shortName=vr + +// VLLMRuntime is the Schema for the vllmruntimes API +type VLLMRuntime struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec VLLMRuntimeSpec `json:"spec,omitempty"` + Status VLLMRuntimeStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// VLLMRuntimeList contains a list of VLLMRuntime +type VLLMRuntimeList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []VLLMRuntime `json:"items"` +} + +func init() { + SchemeBuilder.Register(&VLLMRuntime{}, &VLLMRuntimeList{}) +} diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go new file mode 100644 index 000000000..51e91ba0a --- /dev/null +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -0,0 +1,316 @@ +//go:build !ignore_autogenerated + +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by controller-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + "k8s.io/api/core/v1" + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EnvVar) DeepCopyInto(out *EnvVar) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EnvVar. +func (in *EnvVar) DeepCopy() *EnvVar { + if in == nil { + return nil + } + out := new(EnvVar) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ImageSpec) DeepCopyInto(out *ImageSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ImageSpec. +func (in *ImageSpec) DeepCopy() *ImageSpec { + if in == nil { + return nil + } + out := new(ImageSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LMCacheConfig) DeepCopyInto(out *LMCacheConfig) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LMCacheConfig. +func (in *LMCacheConfig) DeepCopy() *LMCacheConfig { + if in == nil { + return nil + } + out := new(LMCacheConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ModelSpec) DeepCopyInto(out *ModelSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelSpec. +func (in *ModelSpec) DeepCopy() *ModelSpec { + if in == nil { + return nil + } + out := new(ModelSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ResourceRequirements) DeepCopyInto(out *ResourceRequirements) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceRequirements. +func (in *ResourceRequirements) DeepCopy() *ResourceRequirements { + if in == nil { + return nil + } + out := new(ResourceRequirements) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VLLMRouter) DeepCopyInto(out *VLLMRouter) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VLLMRouter. +func (in *VLLMRouter) DeepCopy() *VLLMRouter { + if in == nil { + return nil + } + out := new(VLLMRouter) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *VLLMRouter) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VLLMRouterList) DeepCopyInto(out *VLLMRouterList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]VLLMRouter, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VLLMRouterList. +func (in *VLLMRouterList) DeepCopy() *VLLMRouterList { + if in == nil { + return nil + } + out := new(VLLMRouterList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *VLLMRouterList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VLLMRouterSpec) DeepCopyInto(out *VLLMRouterSpec) { + *out = *in + if in.ExtraArgs != nil { + in, out := &in.ExtraArgs, &out.ExtraArgs + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.NodeSelectorTerms != nil { + in, out := &in.NodeSelectorTerms, &out.NodeSelectorTerms + *out = make([]v1.NodeSelectorTerm, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + out.Image = in.Image + out.Resources = in.Resources + if in.Env != nil { + in, out := &in.Env, &out.Env + *out = make([]EnvVar, len(*in)) + copy(*out, *in) + } + out.VLLMApiKeySecret = in.VLLMApiKeySecret +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VLLMRouterSpec. +func (in *VLLMRouterSpec) DeepCopy() *VLLMRouterSpec { + if in == nil { + return nil + } + out := new(VLLMRouterSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VLLMRouterStatus) DeepCopyInto(out *VLLMRouterStatus) { + *out = *in + in.LastUpdated.DeepCopyInto(&out.LastUpdated) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VLLMRouterStatus. +func (in *VLLMRouterStatus) DeepCopy() *VLLMRouterStatus { + if in == nil { + return nil + } + out := new(VLLMRouterStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VLLMRuntime) DeepCopyInto(out *VLLMRuntime) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VLLMRuntime. +func (in *VLLMRuntime) DeepCopy() *VLLMRuntime { + if in == nil { + return nil + } + out := new(VLLMRuntime) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *VLLMRuntime) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VLLMRuntimeList) DeepCopyInto(out *VLLMRuntimeList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]VLLMRuntime, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VLLMRuntimeList. +func (in *VLLMRuntimeList) DeepCopy() *VLLMRuntimeList { + if in == nil { + return nil + } + out := new(VLLMRuntimeList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *VLLMRuntimeList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VLLMRuntimeSpec) DeepCopyInto(out *VLLMRuntimeSpec) { + *out = *in + out.Model = in.Model + out.LMCacheConfig = in.LMCacheConfig + if in.ExtraArgs != nil { + in, out := &in.ExtraArgs, &out.ExtraArgs + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Env != nil { + in, out := &in.Env, &out.Env + *out = make([]EnvVar, len(*in)) + copy(*out, *in) + } + out.Resources = in.Resources + out.Image = in.Image + out.HFTokenSecret = in.HFTokenSecret +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VLLMRuntimeSpec. +func (in *VLLMRuntimeSpec) DeepCopy() *VLLMRuntimeSpec { + if in == nil { + return nil + } + out := new(VLLMRuntimeSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VLLMRuntimeStatus) DeepCopyInto(out *VLLMRuntimeStatus) { + *out = *in + in.LastUpdated.DeepCopyInto(&out.LastUpdated) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VLLMRuntimeStatus. +func (in *VLLMRuntimeStatus) DeepCopy() *VLLMRuntimeStatus { + if in == nil { + return nil + } + out := new(VLLMRuntimeStatus) + in.DeepCopyInto(out) + return out +} diff --git a/cmd/main.go b/cmd/main.go new file mode 100644 index 000000000..94c41e44b --- /dev/null +++ b/cmd/main.go @@ -0,0 +1,252 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "crypto/tls" + "flag" + "os" + "path/filepath" + + // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) + // to ensure that exec-entrypoint and run can make use of them. + _ "k8s.io/client-go/plugin/pkg/client/auth" + + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/certwatcher" + "sigs.k8s.io/controller-runtime/pkg/healthz" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/metrics/filters" + metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + "sigs.k8s.io/controller-runtime/pkg/webhook" + + servingv1alpha1 "production-stack/api/v1alpha1" + "production-stack/internal/controller" + // +kubebuilder:scaffold:imports +) + +var ( + scheme = runtime.NewScheme() + setupLog = ctrl.Log.WithName("setup") +) + +func init() { + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + + utilruntime.Must(servingv1alpha1.AddToScheme(scheme)) + // +kubebuilder:scaffold:scheme +} + +// nolint:gocyclo +func main() { + var metricsAddr string + var metricsCertPath, metricsCertName, metricsCertKey string + var webhookCertPath, webhookCertName, webhookCertKey string + var enableLeaderElection bool + var probeAddr string + var secureMetrics bool + var enableHTTP2 bool + var tlsOpts []func(*tls.Config) + flag.StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metrics endpoint binds to. "+ + "Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.") + flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") + flag.BoolVar(&enableLeaderElection, "leader-elect", false, + "Enable leader election for controller manager. "+ + "Enabling this will ensure there is only one active controller manager.") + flag.BoolVar(&secureMetrics, "metrics-secure", true, + "If set, the metrics endpoint is served securely via HTTPS. Use --metrics-secure=false to use HTTP instead.") + flag.StringVar(&webhookCertPath, "webhook-cert-path", "", "The directory that contains the webhook certificate.") + flag.StringVar(&webhookCertName, "webhook-cert-name", "tls.crt", "The name of the webhook certificate file.") + flag.StringVar(&webhookCertKey, "webhook-cert-key", "tls.key", "The name of the webhook key file.") + flag.StringVar(&metricsCertPath, "metrics-cert-path", "", + "The directory that contains the metrics server certificate.") + flag.StringVar(&metricsCertName, "metrics-cert-name", "tls.crt", "The name of the metrics server certificate file.") + flag.StringVar(&metricsCertKey, "metrics-cert-key", "tls.key", "The name of the metrics server key file.") + flag.BoolVar(&enableHTTP2, "enable-http2", false, + "If set, HTTP/2 will be enabled for the metrics and webhook servers") + opts := zap.Options{ + Development: true, + } + opts.BindFlags(flag.CommandLine) + flag.Parse() + + ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) + + // if the enable-http2 flag is false (the default), http/2 should be disabled + // due to its vulnerabilities. More specifically, disabling http/2 will + // prevent from being vulnerable to the HTTP/2 Stream Cancellation and + // Rapid Reset CVEs. For more information see: + // - https://github.com/advisories/GHSA-qppj-fm5r-hxr3 + // - https://github.com/advisories/GHSA-4374-p667-p6c8 + disableHTTP2 := func(c *tls.Config) { + setupLog.Info("disabling http/2") + c.NextProtos = []string{"http/1.1"} + } + + if !enableHTTP2 { + tlsOpts = append(tlsOpts, disableHTTP2) + } + + // Create watchers for metrics and webhooks certificates + var metricsCertWatcher, webhookCertWatcher *certwatcher.CertWatcher + + // Initial webhook TLS options + webhookTLSOpts := tlsOpts + + if len(webhookCertPath) > 0 { + setupLog.Info("Initializing webhook certificate watcher using provided certificates", + "webhook-cert-path", webhookCertPath, "webhook-cert-name", webhookCertName, "webhook-cert-key", webhookCertKey) + + var err error + webhookCertWatcher, err = certwatcher.New( + filepath.Join(webhookCertPath, webhookCertName), + filepath.Join(webhookCertPath, webhookCertKey), + ) + if err != nil { + setupLog.Error(err, "Failed to initialize webhook certificate watcher") + os.Exit(1) + } + + webhookTLSOpts = append(webhookTLSOpts, func(config *tls.Config) { + config.GetCertificate = webhookCertWatcher.GetCertificate + }) + } + + webhookServer := webhook.NewServer(webhook.Options{ + TLSOpts: webhookTLSOpts, + }) + + // Metrics endpoint is enabled in 'config/default/kustomization.yaml'. The Metrics options configure the server. + // More info: + // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.20.4/pkg/metrics/server + // - https://book.kubebuilder.io/reference/metrics.html + metricsServerOptions := metricsserver.Options{ + BindAddress: metricsAddr, + SecureServing: secureMetrics, + TLSOpts: tlsOpts, + } + + if secureMetrics { + // FilterProvider is used to protect the metrics endpoint with authn/authz. + // These configurations ensure that only authorized users and service accounts + // can access the metrics endpoint. The RBAC are configured in 'config/rbac/kustomization.yaml'. More info: + // https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.20.4/pkg/metrics/filters#WithAuthenticationAndAuthorization + metricsServerOptions.FilterProvider = filters.WithAuthenticationAndAuthorization + } + + // If the certificate is not specified, controller-runtime will automatically + // generate self-signed certificates for the metrics server. While convenient for development and testing, + // this setup is not recommended for production. + // + // TODO(user): If you enable certManager, uncomment the following lines: + // - [METRICS-WITH-CERTS] at config/default/kustomization.yaml to generate and use certificates + // managed by cert-manager for the metrics server. + // - [PROMETHEUS-WITH-CERTS] at config/prometheus/kustomization.yaml for TLS certification. + if len(metricsCertPath) > 0 { + setupLog.Info("Initializing metrics certificate watcher using provided certificates", + "metrics-cert-path", metricsCertPath, "metrics-cert-name", metricsCertName, "metrics-cert-key", metricsCertKey) + + var err error + metricsCertWatcher, err = certwatcher.New( + filepath.Join(metricsCertPath, metricsCertName), + filepath.Join(metricsCertPath, metricsCertKey), + ) + if err != nil { + setupLog.Error(err, "to initialize metrics certificate watcher", "error", err) + os.Exit(1) + } + + metricsServerOptions.TLSOpts = append(metricsServerOptions.TLSOpts, func(config *tls.Config) { + config.GetCertificate = metricsCertWatcher.GetCertificate + }) + } + + mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ + Scheme: scheme, + Metrics: metricsServerOptions, + WebhookServer: webhookServer, + HealthProbeBindAddress: probeAddr, + LeaderElection: enableLeaderElection, + LeaderElectionID: "4549d26f.vllm.ai", + // LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily + // when the Manager ends. This requires the binary to immediately end when the + // Manager is stopped, otherwise, this setting is unsafe. Setting this significantly + // speeds up voluntary leader transitions as the new leader don't have to wait + // LeaseDuration time first. + // + // In the default scaffold provided, the program ends immediately after + // the manager stops, so would be fine to enable this option. However, + // if you are doing or is intended to do any operation such as perform cleanups + // after the manager stops then its usage might be unsafe. + // LeaderElectionReleaseOnCancel: true, + }) + if err != nil { + setupLog.Error(err, "unable to start manager") + os.Exit(1) + } + + if err = (&controller.VLLMRouterReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "VLLMRouter") + os.Exit(1) + } + + if err = (&controller.VLLMRuntimeReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "VLLMRuntime") + os.Exit(1) + } + // +kubebuilder:scaffold:builder + + if metricsCertWatcher != nil { + setupLog.Info("Adding metrics certificate watcher to manager") + if err := mgr.Add(metricsCertWatcher); err != nil { + setupLog.Error(err, "unable to add metrics certificate watcher to manager") + os.Exit(1) + } + } + + if webhookCertWatcher != nil { + setupLog.Info("Adding webhook certificate watcher to manager") + if err := mgr.Add(webhookCertWatcher); err != nil { + setupLog.Error(err, "unable to add webhook certificate watcher to manager") + os.Exit(1) + } + } + + if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up health check") + os.Exit(1) + } + if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up ready check") + os.Exit(1) + } + + setupLog.Info("starting manager") + if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { + setupLog.Error(err, "problem running manager") + os.Exit(1) + } +} diff --git a/config/crd/bases/production-stack.vllm.ai_staticroutes.yaml b/config/crd/bases/production-stack.vllm.ai_staticroutes.yaml new file mode 100644 index 000000000..cd6dd48bd --- /dev/null +++ b/config/crd/bases/production-stack.vllm.ai_staticroutes.yaml @@ -0,0 +1,218 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.2 + name: staticroutes.production-stack.vllm.ai +spec: + group: production-stack.vllm.ai + names: + kind: StaticRoute + listKind: StaticRouteList + plural: staticroutes + singular: staticroute + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: StaticRoute is the Schema for the staticroutes API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: StaticRouteSpec defines the desired state of StaticRoute + properties: + configMapName: + description: ConfigMapName is the name of the ConfigMap to create + with the dynamic config + type: string + healthCheck: + description: HealthCheck defines the health check configuration for + the router + properties: + failureThreshold: + default: 3 + description: Minimum consecutive failures for the probe to be + considered failed + format: int32 + minimum: 1 + type: integer + periodSeconds: + default: 10 + description: Number of seconds between probe attempts + format: int32 + minimum: 1 + type: integer + successThreshold: + default: 1 + description: Minimum consecutive successes for the probe to be + considered successful + format: int32 + minimum: 1 + type: integer + timeoutSeconds: + default: 5 + description: Number of seconds after which the probe times out + format: int32 + minimum: 1 + type: integer + type: object + routerRef: + description: RouterRef is a reference to the router service + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: |- + If referring to a piece of an object instead of an entire object, this string + should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within a pod, this would take on a value like: + "spec.containers{name}" (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" (container with + index 2 in this pod). This syntax is chosen only to have some well-defined way of + referencing a part of an object. + type: string + kind: + description: |- + Kind of the referent. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + namespace: + description: |- + Namespace of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ + type: string + resourceVersion: + description: |- + Specific resourceVersion to which this reference is made, if any. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency + type: string + uid: + description: |- + UID of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids + type: string + type: object + x-kubernetes-map-type: atomic + routingLogic: + default: roundrobin + description: RoutingLogic specifies the routing logic to use + enum: + - roundrobin + type: string + serviceDiscovery: + default: static + description: ServiceDiscovery specifies the service discovery method + enum: + - static + type: string + staticBackends: + description: StaticBackends is a comma-separated list of backend URLs + type: string + staticModels: + description: StaticModels is a comma-separated list of model names + type: string + required: + - routingLogic + - serviceDiscovery + - staticBackends + - staticModels + type: object + status: + description: StaticRouteStatus defines the observed state of StaticRoute + properties: + conditions: + description: Conditions represent the latest available observations + of the StaticRoute's state + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + configMapRef: + description: ConfigMapRef is a reference to the created ConfigMap + type: string + lastAppliedTime: + description: LastAppliedTime is the last time the configuration was + applied to the router + format: date-time + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/bases/serving.vllm.ai_vllmrouters.yaml b/config/crd/bases/serving.vllm.ai_vllmrouters.yaml new file mode 100644 index 000000000..7445c2e4c --- /dev/null +++ b/config/crd/bases/serving.vllm.ai_vllmrouters.yaml @@ -0,0 +1,251 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.2 + name: vllmrouters.serving.vllm.ai +spec: + group: serving.vllm.ai + names: + kind: VLLMRouter + listKind: VLLMRouterList + plural: vllmrouters + singular: vllmrouter + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: VLLMRouter is the Schema for the vllmrouters API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: VLLMRouterSpec defines the desired state of VLLMRouter + properties: + enableRouter: + default: true + description: EnableRouter determines if the router should be deployed + type: boolean + engineScrapeInterval: + description: EngineScrapeInterval for collecting engine statistics + type: string + env: + description: Environment variables + items: + description: EnvVar represents an environment variable + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + extraArgs: + description: ExtraArgs for additional router arguments + items: + type: string + type: array + image: + description: Image configuration + properties: + name: + type: string + pullPolicy: + type: string + pullSecretName: + type: string + registry: + type: string + required: + - name + - registry + type: object + nodeSelectorTerms: + description: NodeSelectorTerms for pod scheduling + items: + description: |- + A null or empty node selector term matches no objects. The requirements of + them are ANDed. + The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector requirements by node's + labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, Not In, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or Not In, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector requirements by node's + fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, Not In, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or Not In, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + type: array + port: + default: 80 + description: ContainerPort for the router service + format: int32 + type: integer + replicas: + default: 1 + description: Replicas specifies the number of router replicas + format: int32 + type: integer + requestStatsWindow: + description: RequestStatsWindow for request statistics + type: string + resources: + description: Resource requirements + properties: + cpu: + type: string + gpu: + type: string + memory: + type: string + type: object + routingLogic: + default: roundrobin + description: RoutingLogic specifies the routing strategy + enum: + - roundrobin + - session + type: string + serviceAccountName: + description: ServiceAccountName for the router pod + type: string + serviceDiscovery: + default: k8s + description: ServiceDiscovery specifies the service discovery method + (k8s or static) + enum: + - k8s + - static + type: string + sessionKey: + default: "" + description: SessionKey for session-based routing + type: string + staticBackends: + description: StaticBackends is required when using static service + discovery + type: string + staticModels: + description: StaticModels is required when using static service discovery + type: string + vllmApiKeyName: + type: string + vllmApiKeySecret: + description: VLLM API Key configuration + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + required: + - image + - resources + type: object + status: + description: VLLMRouterStatus defines the observed state of VLLMRouter + properties: + activeRuntimes: + description: Number of active runtimes + format: int32 + type: integer + lastUpdated: + description: Last updated timestamp + format: date-time + type: string + status: + description: Router status + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/bases/serving.vllm.ai_vllmruntimes.yaml b/config/crd/bases/serving.vllm.ai_vllmruntimes.yaml new file mode 100644 index 000000000..311dc82a8 --- /dev/null +++ b/config/crd/bases/serving.vllm.ai_vllmruntimes.yaml @@ -0,0 +1,216 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.2 + name: vllmruntimes.serving.vllm.ai +spec: + group: serving.vllm.ai + names: + kind: VLLMRuntime + listKind: VLLMRuntimeList + plural: vllmruntimes + shortNames: + - vr + singular: vllmruntime + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: VLLMRuntime is the Schema for the vllmruntimes API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: VLLMRuntimeSpec defines the desired state of VLLMRuntime + properties: + deploymentStrategy: + default: RollingUpdate + description: Deploy strategy + enum: + - RollingUpdate + - Recreate + type: string + enableChunkedPrefill: + description: Enable chunked prefill + type: boolean + enablePrefixCaching: + description: Enable prefix caching + type: boolean + env: + description: Environment variables + items: + description: EnvVar represents an environment variable + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + extraArgs: + description: Extra arguments for vllm serve + items: + type: string + type: array + gpuMemoryUtilization: + description: GPU memory utilization + type: string + hfTokenName: + default: token + type: string + hfTokenSecret: + description: HuggingFace token secret + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + image: + description: Image configuration + properties: + name: + type: string + pullPolicy: + type: string + pullSecretName: + type: string + registry: + type: string + required: + - name + - registry + type: object + lmCacheConfig: + description: LM Cache configuration + properties: + cpuOffloadingBufferSize: + default: 4Gi + description: CPUOffloadingBufferSize is the size of the CPU offloading + buffer + type: string + diskOffloadingBufferSize: + default: 8Gi + description: DiskOffloadingBufferSize is the size of the disk + offloading buffer + type: string + enabled: + default: false + description: Enabled enables LM Cache + type: boolean + remoteSerde: + description: RemoteSerde is the serialization format for the remote + cache + type: string + remoteUrl: + description: RemoteURL is the URL of the remote cache server + type: string + type: object + maxLoras: + description: Maximum number of LoRAs + format: int32 + type: integer + model: + description: Model configuration + properties: + dtype: + description: Data type + type: string + enableLoRA: + description: Enable LoRA + type: boolean + enableTool: + description: Enable tool + type: boolean + maxModelLen: + description: Maximum model length + format: int32 + type: integer + maxNumSeqs: + description: Maximum number of sequences + format: int32 + type: integer + modelURL: + description: Model URL + type: string + toolCallParser: + description: Tool call parser + type: string + required: + - modelURL + type: object + port: + default: 8000 + description: Port for vLLM server + format: int32 + type: integer + replicas: + default: 1 + description: Replicas + format: int32 + type: integer + resources: + description: Resource requirements + properties: + cpu: + type: string + gpu: + type: string + memory: + type: string + type: object + tensorParallelSize: + description: Tensor parallel size + format: int32 + type: integer + v1: + description: Use V1 API + type: boolean + required: + - image + - model + - resources + type: object + status: + description: VLLMRuntimeStatus defines the observed state of VLLMRuntime + properties: + lastUpdated: + description: Last updated timestamp + format: date-time + type: string + modelStatus: + description: Model status + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml new file mode 100644 index 000000000..500f2b34a --- /dev/null +++ b/config/crd/kustomization.yaml @@ -0,0 +1,17 @@ +# This kustomization.yaml is not intended to be run by itself, +# since it depends on service name and namespace that are out of this kustomize package. +# It should be run by config/default +resources: +- bases/serving.vllm.ai_vllmruntimes.yaml +- bases/serving.vllm.ai_vllmrouters.yaml +# +kubebuilder:scaffold:crdkustomizeresource + +patches: +# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix. +# patches here are for enabling the conversion webhook for each CRD +# +kubebuilder:scaffold:crdkustomizewebhookpatch + +# [WEBHOOK] To enable webhook, uncomment the following section +# the following config is for teaching kustomize how to do kustomization for CRDs. +#configurations: +#- kustomizeconfig.yaml diff --git a/config/crd/kustomizeconfig.yaml b/config/crd/kustomizeconfig.yaml new file mode 100644 index 000000000..ec5c150a9 --- /dev/null +++ b/config/crd/kustomizeconfig.yaml @@ -0,0 +1,19 @@ +# This file is for teaching kustomize how to substitute name and namespace reference in CRD +nameReference: +- kind: Service + version: v1 + fieldSpecs: + - kind: CustomResourceDefinition + version: v1 + group: apiextensions.k8s.io + path: spec/conversion/webhook/clientConfig/service/name + +namespace: +- kind: CustomResourceDefinition + version: v1 + group: apiextensions.k8s.io + path: spec/conversion/webhook/clientConfig/service/namespace + create: false + +varReference: +- path: metadata/annotations diff --git a/config/default/cert_metrics_manager_patch.yaml b/config/default/cert_metrics_manager_patch.yaml new file mode 100644 index 000000000..d97501553 --- /dev/null +++ b/config/default/cert_metrics_manager_patch.yaml @@ -0,0 +1,30 @@ +# This patch adds the args, volumes, and ports to allow the manager to use the metrics-server certs. + +# Add the volumeMount for the metrics-server certs +- op: add + path: /spec/template/spec/containers/0/volumeMounts/- + value: + mountPath: /tmp/k8s-metrics-server/metrics-certs + name: metrics-certs + readOnly: true + +# Add the --metrics-cert-path argument for the metrics server +- op: add + path: /spec/template/spec/containers/0/args/- + value: --metrics-cert-path=/tmp/k8s-metrics-server/metrics-certs + +# Add the metrics-server certs volume configuration +- op: add + path: /spec/template/spec/volumes/- + value: + name: metrics-certs + secret: + secretName: metrics-server-cert + optional: false + items: + - key: ca.crt + path: ca.crt + - key: tls.crt + path: tls.crt + - key: tls.key + path: tls.key diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml new file mode 100644 index 000000000..db3e47347 --- /dev/null +++ b/config/default/kustomization.yaml @@ -0,0 +1,234 @@ +# Adds namespace to all resources. +namespace: production-stack-system + +# Value of this field is prepended to the +# names of all resources, e.g. a deployment named +# "wordpress" becomes "alices-wordpress". +# Note that it should also match with the prefix (text before '-') of the namespace +# field above. +namePrefix: production-stack- + +# Labels to add to all resources and selectors. +#labels: +#- includeSelectors: true +# pairs: +# someName: someValue + +resources: +- ../crd +- ../rbac +- ../manager +# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in +# crd/kustomization.yaml +#- ../webhook +# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required. +#- ../certmanager +# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. +#- ../prometheus +# [METRICS] Expose the controller manager metrics service. +- metrics_service.yaml +# [NETWORK POLICY] Protect the /metrics endpoint and Webhook Server with NetworkPolicy. +# Only Pod(s) running a namespace labeled with 'metrics: enabled' will be able to gather the metrics. +# Only CR(s) which requires webhooks and are applied on namespaces labeled with 'webhooks: enabled' will +# be able to communicate with the Webhook Server. +#- ../network-policy + +# Uncomment the patches line if you enable Metrics +patches: +# [METRICS] The following patch will enable the metrics endpoint using HTTPS and the port :8443. +# More info: https://book.kubebuilder.io/reference/metrics +- path: manager_metrics_patch.yaml + target: + kind: Deployment + +# Uncomment the patches line if you enable Metrics and CertManager +# [METRICS-WITH-CERTS] To enable metrics protected with certManager, uncomment the following line. +# This patch will protect the metrics with certManager self-signed certs. +#- path: cert_metrics_manager_patch.yaml +# target: +# kind: Deployment + +# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in +# crd/kustomization.yaml +#- path: manager_webhook_patch.yaml +# target: +# kind: Deployment + +# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix. +# Uncomment the following replacements to add the cert-manager CA injection annotations +#replacements: +# - source: # Uncomment the following block to enable certificates for metrics +# kind: Service +# version: v1 +# name: controller-manager-metrics-service +# fieldPath: metadata.name +# targets: +# - select: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: metrics-certs +# fieldPaths: +# - spec.dnsNames.0 +# - spec.dnsNames.1 +# options: +# delimiter: '.' +# index: 0 +# create: true +# - select: # Uncomment the following to set the Service name for TLS config in Prometheus ServiceMonitor +# kind: ServiceMonitor +# group: monitoring.coreos.com +# version: v1 +# name: controller-manager-metrics-monitor +# fieldPaths: +# - spec.endpoints.0.tlsConfig.serverName +# options: +# delimiter: '.' +# index: 0 +# create: true +# +# - source: +# kind: Service +# version: v1 +# name: controller-manager-metrics-service +# fieldPath: metadata.namespace +# targets: +# - select: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: metrics-certs +# fieldPaths: +# - spec.dnsNames.0 +# - spec.dnsNames.1 +# options: +# delimiter: '.' +# index: 1 +# create: true +# - select: # Uncomment the following to set the Service namespace for TLS in Prometheus ServiceMonitor +# kind: ServiceMonitor +# group: monitoring.coreos.com +# version: v1 +# name: controller-manager-metrics-monitor +# fieldPaths: +# - spec.endpoints.0.tlsConfig.serverName +# options: +# delimiter: '.' +# index: 1 +# create: true +# +# - source: # Uncomment the following block if you have any webhook +# kind: Service +# version: v1 +# name: webhook-service +# fieldPath: .metadata.name # Name of the service +# targets: +# - select: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert +# fieldPaths: +# - .spec.dnsNames.0 +# - .spec.dnsNames.1 +# options: +# delimiter: '.' +# index: 0 +# create: true +# - source: +# kind: Service +# version: v1 +# name: webhook-service +# fieldPath: .metadata.namespace # Namespace of the service +# targets: +# - select: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert +# fieldPaths: +# - .spec.dnsNames.0 +# - .spec.dnsNames.1 +# options: +# delimiter: '.' +# index: 1 +# create: true +# +# - source: # Uncomment the following block if you have a ValidatingWebhook (--programmatic-validation) +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert # This name should match the one in certificate.yaml +# fieldPath: .metadata.namespace # Namespace of the certificate CR +# targets: +# - select: +# kind: ValidatingWebhookConfiguration +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 0 +# create: true +# - source: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert +# fieldPath: .metadata.name +# targets: +# - select: +# kind: ValidatingWebhookConfiguration +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 1 +# create: true +# +# - source: # Uncomment the following block if you have a DefaultingWebhook (--defaulting ) +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert +# fieldPath: .metadata.namespace # Namespace of the certificate CR +# targets: +# - select: +# kind: MutatingWebhookConfiguration +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 0 +# create: true +# - source: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert +# fieldPath: .metadata.name +# targets: +# - select: +# kind: MutatingWebhookConfiguration +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 1 +# create: true +# +# - source: # Uncomment the following block if you have a ConversionWebhook (--conversion) +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert +# fieldPath: .metadata.namespace # Namespace of the certificate CR +# targets: # Do not remove or uncomment the following scaffold marker; required to generate code for target CRD. +# +kubebuilder:scaffold:crdkustomizecainjectionns +# - source: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert +# fieldPath: .metadata.name +# targets: # Do not remove or uncomment the following scaffold marker; required to generate code for target CRD. +# +kubebuilder:scaffold:crdkustomizecainjectionname diff --git a/config/default/manager_metrics_patch.yaml b/config/default/manager_metrics_patch.yaml new file mode 100644 index 000000000..2aaef6536 --- /dev/null +++ b/config/default/manager_metrics_patch.yaml @@ -0,0 +1,4 @@ +# This patch adds the args to allow exposing the metrics endpoint using HTTPS +- op: add + path: /spec/template/spec/containers/0/args/0 + value: --metrics-bind-address=:8443 diff --git a/config/default/metrics_service.yaml b/config/default/metrics_service.yaml new file mode 100644 index 000000000..31e49eaef --- /dev/null +++ b/config/default/metrics_service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + control-plane: controller-manager + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize + name: controller-manager-metrics-service + namespace: system +spec: + ports: + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + control-plane: controller-manager + app.kubernetes.io/name: production-stack diff --git a/config/manager/deployment.yaml b/config/manager/deployment.yaml new file mode 100644 index 000000000..515739858 --- /dev/null +++ b/config/manager/deployment.yaml @@ -0,0 +1,92 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: production-stack-controller-manager + namespace: production-stack-system + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/instance: production-stack + app.kubernetes.io/component: manager + app.kubernetes.io/created-by: production-stack + app.kubernetes.io/part-of: production-stack + app.kubernetes.io/managed-by: kustomize +spec: + selector: + matchLabels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/instance: production-stack + app.kubernetes.io/component: manager + template: + metadata: + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/instance: production-stack + app.kubernetes.io/component: manager + spec: + # TODO(user): Uncomment the following code to configure the nodeAffinity expression + # according to the platforms which are supported by your solution. + # It is considered best practice to support multiple architectures. You can + # build your manager image using the makefile target docker-buildx. + # affinity: + # nodeAffinity: + # requiredDuringSchedulingIgnoredDuringExecution: + # nodeSelectorTerms: + # - matchExpressions: + # - key: kubernetes.io/arch + # operator: In + # values: + # - amd64 + # - arm64 + # - ppc64le + # - s390x + # - key: kubernetes.io/os + # operator: In + # values: + # - linux + securityContext: + # Projects are configured by default to adhere to the "restricted" Pod Security Standards. + # This ensures that deployments meet the highest security requirements for Kubernetes. + # For more details, see: https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + containers: + - command: + - /manager + args: + - --leader-elect + - --health-probe-bind-address=:8081 + image: controller:latest + imagePullPolicy: Always + name: manager + ports: [] + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "ALL" + livenessProbe: + httpGet: + path: /healthz + port: 8081 + initialDelaySeconds: 15 + periodSeconds: 20 + readinessProbe: + httpGet: + path: /readyz + port: 8081 + initialDelaySeconds: 5 + periodSeconds: 10 + # TODO(user): Configure the resources accordingly based on the project requirements. + # More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 10m + memory: 64Mi + volumeMounts: [] + volumes: [] + serviceAccountName: production-stack-controller-manager + terminationGracePeriodSeconds: 10 diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml new file mode 100644 index 000000000..f3af0a933 --- /dev/null +++ b/config/manager/kustomization.yaml @@ -0,0 +1,9 @@ +resources: +- namespace.yaml +- deployment.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +images: +- name: controller + newName: controller + newTag: latest diff --git a/config/manager/namespace.yaml b/config/manager/namespace.yaml new file mode 100644 index 000000000..8084f7071 --- /dev/null +++ b/config/manager/namespace.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Namespace +metadata: + labels: + control-plane: controller-manager + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize + name: system diff --git a/config/network-policy/allow-metrics-traffic.yaml b/config/network-policy/allow-metrics-traffic.yaml new file mode 100644 index 000000000..034723af1 --- /dev/null +++ b/config/network-policy/allow-metrics-traffic.yaml @@ -0,0 +1,27 @@ +# This NetworkPolicy allows ingress traffic +# with Pods running on namespaces labeled with 'metrics: enabled'. Only Pods on those +# namespaces are able to gather data from the metrics endpoint. +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize + name: allow-metrics-traffic + namespace: system +spec: + podSelector: + matchLabels: + control-plane: controller-manager + app.kubernetes.io/name: production-stack + policyTypes: + - Ingress + ingress: + # This allows ingress traffic from any namespace with the label metrics: enabled + - from: + - namespaceSelector: + matchLabels: + metrics: enabled # Only from namespaces with this label + ports: + - port: 8443 + protocol: TCP diff --git a/config/network-policy/kustomization.yaml b/config/network-policy/kustomization.yaml new file mode 100644 index 000000000..ec0fb5e57 --- /dev/null +++ b/config/network-policy/kustomization.yaml @@ -0,0 +1,2 @@ +resources: +- allow-metrics-traffic.yaml diff --git a/config/prometheus/kustomization.yaml b/config/prometheus/kustomization.yaml new file mode 100644 index 000000000..fdc5481b1 --- /dev/null +++ b/config/prometheus/kustomization.yaml @@ -0,0 +1,11 @@ +resources: +- monitor.yaml + +# [PROMETHEUS-WITH-CERTS] The following patch configures the ServiceMonitor in ../prometheus +# to securely reference certificates created and managed by cert-manager. +# Additionally, ensure that you uncomment the [METRICS WITH CERTMANAGER] patch under config/default/kustomization.yaml +# to mount the "metrics-server-cert" secret in the Manager Deployment. +#patches: +# - path: monitor_tls_patch.yaml +# target: +# kind: ServiceMonitor diff --git a/config/prometheus/monitor.yaml b/config/prometheus/monitor.yaml new file mode 100644 index 000000000..e5272d7b8 --- /dev/null +++ b/config/prometheus/monitor.yaml @@ -0,0 +1,27 @@ +# Prometheus Monitor Service (Metrics) +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + control-plane: controller-manager + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize + name: controller-manager-metrics-monitor + namespace: system +spec: + endpoints: + - path: /metrics + port: https # Ensure this is the name of the port that exposes HTTPS metrics + scheme: https + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + tlsConfig: + # TODO(user): The option insecureSkipVerify: true is not recommended for production since it disables + # certificate verification, exposing the system to potential man-in-the-middle attacks. + # For production environments, it is recommended to use cert-manager for automatic TLS certificate management. + # To apply this configuration, enable cert-manager and use the patch located at config/prometheus/servicemonitor_tls_patch.yaml, + # which securely references the certificate from the 'metrics-server-cert' secret. + insecureSkipVerify: true + selector: + matchLabels: + control-plane: controller-manager + app.kubernetes.io/name: production-stack diff --git a/config/prometheus/monitor_tls_patch.yaml b/config/prometheus/monitor_tls_patch.yaml new file mode 100644 index 000000000..5bf84ce0d --- /dev/null +++ b/config/prometheus/monitor_tls_patch.yaml @@ -0,0 +1,19 @@ +# Patch for Prometheus ServiceMonitor to enable secure TLS configuration +# using certificates managed by cert-manager +- op: replace + path: /spec/endpoints/0/tlsConfig + value: + # SERVICE_NAME and SERVICE_NAMESPACE will be substituted by kustomize + serverName: SERVICE_NAME.SERVICE_NAMESPACE.svc + insecureSkipVerify: false + ca: + secret: + name: metrics-server-cert + key: ca.crt + cert: + secret: + name: metrics-server-cert + key: tls.crt + keySecret: + name: metrics-server-cert + key: tls.key diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml new file mode 100644 index 000000000..7d46d2f99 --- /dev/null +++ b/config/rbac/kustomization.yaml @@ -0,0 +1,30 @@ +resources: +# All RBAC will be applied under this service account in +# the deployment namespace. You may comment out this resource +# if your manager will use a service account that exists at +# runtime. Be sure to update RoleBinding and ClusterRoleBinding +# subjects if changing service account names. +- service_account.yaml +- role.yaml +- role_binding.yaml +- leader_election_role.yaml +- leader_election_role_binding.yaml +# The following RBAC configurations are used to protect +# the metrics endpoint with authn/authz. These configurations +# ensure that only authorized users and service accounts +# can access the metrics endpoint. Comment the following +# permissions if you want to disable this protection. +# More info: https://book.kubebuilder.io/reference/metrics.html +- metrics_auth_role.yaml +- metrics_auth_role_binding.yaml +- metrics_reader_role.yaml +# For each CRD, "Admin", "Editor" and "Viewer" roles are scaffolded by +# default, aiding admins in cluster management. Those roles are +# not used by the {{ .ProjectName }} itself. You can comment the following lines +# if you do not want those helpers be installed with your Project. +- router_admin_role.yaml +- router_editor_role.yaml +- router_viewer_role.yaml +- vllmruntime_admin_role.yaml +- vllmruntime_editor_role.yaml +- vllmruntime_viewer_role.yaml diff --git a/config/rbac/leader_election_role.yaml b/config/rbac/leader_election_role.yaml new file mode 100644 index 000000000..acffe969c --- /dev/null +++ b/config/rbac/leader_election_role.yaml @@ -0,0 +1,40 @@ +# permissions to do leader election. +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize + name: leader-election-role +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch diff --git a/config/rbac/leader_election_role_binding.yaml b/config/rbac/leader_election_role_binding.yaml new file mode 100644 index 000000000..ec33def98 --- /dev/null +++ b/config/rbac/leader_election_role_binding.yaml @@ -0,0 +1,15 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize + name: leader-election-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: leader-election-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/config/rbac/metrics_auth_role.yaml b/config/rbac/metrics_auth_role.yaml new file mode 100644 index 000000000..32d2e4ec6 --- /dev/null +++ b/config/rbac/metrics_auth_role.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: metrics-auth-role +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/config/rbac/metrics_auth_role_binding.yaml b/config/rbac/metrics_auth_role_binding.yaml new file mode 100644 index 000000000..e775d67ff --- /dev/null +++ b/config/rbac/metrics_auth_role_binding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: metrics-auth-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: metrics-auth-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/config/rbac/metrics_reader_role.yaml b/config/rbac/metrics_reader_role.yaml new file mode 100644 index 000000000..51a75db47 --- /dev/null +++ b/config/rbac/metrics_reader_role.yaml @@ -0,0 +1,9 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: metrics-reader +rules: +- nonResourceURLs: + - "/metrics" + verbs: + - get diff --git a/config/rbac/pod_viewer_role.yaml b/config/rbac/pod_viewer_role.yaml new file mode 100644 index 000000000..b94a22369 --- /dev/null +++ b/config/rbac/pod_viewer_role.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: pod-viewer-role + namespace: default + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize +rules: +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml new file mode 100644 index 000000000..7109b302c --- /dev/null +++ b/config/rbac/role.yaml @@ -0,0 +1,95 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: manager-role +rules: +- apiGroups: + - "" + resources: + - configmaps + - secrets + - services + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - deployments + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - production-stack.vllm.ai + resources: + - staticroutes + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - production-stack.vllm.ai + resources: + - staticroutes/finalizers + verbs: + - update +- apiGroups: + - production-stack.vllm.ai + resources: + - staticroutes/status + verbs: + - get + - patch + - update +- apiGroups: + - serving.vllm.ai + resources: + - vllmrouters + - vllmruntimes + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - serving.vllm.ai + resources: + - vllmrouters/finalizers + - vllmruntimes/finalizers + verbs: + - update +- apiGroups: + - serving.vllm.ai + resources: + - vllmrouters/status + - vllmruntimes/status + verbs: + - get + - patch + - update diff --git a/config/rbac/role_binding.yaml b/config/rbac/role_binding.yaml new file mode 100644 index 000000000..b61dbe83f --- /dev/null +++ b/config/rbac/role_binding.yaml @@ -0,0 +1,15 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize + name: manager-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: manager-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/config/rbac/service_account.yaml b/config/rbac/service_account.yaml new file mode 100644 index 000000000..8e3b2f377 --- /dev/null +++ b/config/rbac/service_account.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize + name: controller-manager + namespace: system diff --git a/config/rbac/vllmrouter_admin_role.yaml b/config/rbac/vllmrouter_admin_role.yaml new file mode 100644 index 000000000..a42914d6b --- /dev/null +++ b/config/rbac/vllmrouter_admin_role.yaml @@ -0,0 +1,27 @@ +# This rule is not used by the project production-stack itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants full permissions ('*') over serving.vllm.ai. +# This role is intended for users authorized to modify roles and bindings within the cluster, +# enabling them to delegate specific permissions to other users or groups as needed. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize + name: vllmrouter-admin-role +rules: +- apiGroups: + - serving.vllm.ai + resources: + - routers + verbs: + - '*' +- apiGroups: + - serving.vllm.ai + resources: + - routers/status + verbs: + - get diff --git a/config/rbac/vllmrouter_editor_role.yaml b/config/rbac/vllmrouter_editor_role.yaml new file mode 100644 index 000000000..0fbf83a34 --- /dev/null +++ b/config/rbac/vllmrouter_editor_role.yaml @@ -0,0 +1,33 @@ +# This rule is not used by the project production-stack itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants permissions to create, update, and delete resources within the serving.vllm.ai. +# This role is intended for users who need to manage these resources +# but should not control RBAC or manage permissions for others. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize + name: vllmrouter-editor-role +rules: +- apiGroups: + - serving.vllm.ai + resources: + - vllmrouters + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - serving.vllm.ai + resources: + - vllmrouters/status + verbs: + - get diff --git a/config/rbac/vllmrouter_role_binding.yaml b/config/rbac/vllmrouter_role_binding.yaml new file mode 100644 index 000000000..a29c577c8 --- /dev/null +++ b/config/rbac/vllmrouter_role_binding.yaml @@ -0,0 +1,16 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: pod-viewer-binding + namespace: default + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize +subjects: +- kind: ServiceAccount + name: vllmrouter-sa + namespace: default +roleRef: + kind: Role + name: pod-viewer-role + apiGroup: rbac.authorization.k8s.io diff --git a/config/rbac/vllmrouter_service_account.yaml b/config/rbac/vllmrouter_service_account.yaml new file mode 100644 index 000000000..4bb14d72b --- /dev/null +++ b/config/rbac/vllmrouter_service_account.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: vllmrouter-sa + namespace: default + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize diff --git a/config/rbac/vllmrouter_viewer_role.yaml b/config/rbac/vllmrouter_viewer_role.yaml new file mode 100644 index 000000000..5da0be3d5 --- /dev/null +++ b/config/rbac/vllmrouter_viewer_role.yaml @@ -0,0 +1,29 @@ +# This rule is not used by the project production-stack itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants read-only access to serving.vllm.ai resources. +# This role is intended for users who need visibility into these resources +# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize + name: vllmrouter-viewer-role +rules: +- apiGroups: + - serving.vllm.ai + resources: + - vllmrouters + verbs: + - get + - list + - watch +- apiGroups: + - serving.vllm.ai + resources: + - vllmrouters/status + verbs: + - get diff --git a/config/rbac/vllmruntime_admin_role.yaml b/config/rbac/vllmruntime_admin_role.yaml new file mode 100644 index 000000000..f765cb599 --- /dev/null +++ b/config/rbac/vllmruntime_admin_role.yaml @@ -0,0 +1,27 @@ +# This rule is not used by the project production-stack itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants full permissions ('*') over serving.vllm.ai. +# This role is intended for users authorized to modify roles and bindings within the cluster, +# enabling them to delegate specific permissions to other users or groups as needed. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize + name: vllmruntime-admin-role +rules: +- apiGroups: + - serving.vllm.ai + resources: + - vllmruntimes + verbs: + - '*' +- apiGroups: + - serving.vllm.ai + resources: + - vllmruntimes/status + verbs: + - get diff --git a/config/rbac/vllmruntime_editor_role.yaml b/config/rbac/vllmruntime_editor_role.yaml new file mode 100644 index 000000000..c62691af5 --- /dev/null +++ b/config/rbac/vllmruntime_editor_role.yaml @@ -0,0 +1,33 @@ +# This rule is not used by the project production-stack itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants permissions to create, update, and delete resources within the serving.vllm.ai. +# This role is intended for users who need to manage these resources +# but should not control RBAC or manage permissions for others. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize + name: vllmruntime-editor-role +rules: +- apiGroups: + - serving.vllm.ai + resources: + - vllmruntimes + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - serving.vllm.ai + resources: + - vllmruntimes/status + verbs: + - get diff --git a/config/rbac/vllmruntime_viewer_role.yaml b/config/rbac/vllmruntime_viewer_role.yaml new file mode 100644 index 000000000..1314e4715 --- /dev/null +++ b/config/rbac/vllmruntime_viewer_role.yaml @@ -0,0 +1,29 @@ +# This rule is not used by the project production-stack itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants read-only access to serving.vllm.ai resources. +# This role is intended for users who need visibility into these resources +# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize + name: vllmruntime-viewer-role +rules: +- apiGroups: + - serving.vllm.ai + resources: + - vllmruntimes + verbs: + - get + - list + - watch +- apiGroups: + - serving.vllm.ai + resources: + - vllmruntimes/status + verbs: + - get diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml new file mode 100644 index 000000000..8a0f43239 --- /dev/null +++ b/config/samples/kustomization.yaml @@ -0,0 +1,8 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +## Append samples of your project ## +resources: +- serving_v1alpha1_vllmruntime.yaml +- serving_v1alpha1_router.yaml +# +kubebuilder:scaffold:manifestskustomizesamples diff --git a/config/samples/serving_v1alpha1_router.yaml b/config/samples/serving_v1alpha1_router.yaml new file mode 100644 index 000000000..3400dace4 --- /dev/null +++ b/config/samples/serving_v1alpha1_router.yaml @@ -0,0 +1,57 @@ +apiVersion: serving.vllm.ai/v1alpha1 +kind: VLLMRouter +metadata: + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize + name: vllmrouter-sample +spec: + # Enable the router deployment + enableRouter: true + + # Number of router replicas + replicas: 1 + + # Service discovery method (k8s or static) + serviceDiscovery: k8s + + # Routing strategy (roundrobin or session) + routingLogic: roundrobin + + # Engine statistics collection interval + engineScrapeInterval: "30" + + # Request statistics window + requestStatsWindow: "60" + + # Container port for the router service + port: 80 + + # Service account name + serviceAccountName: vllmrouter-sa + + # Image configuration + image: + registry: docker.io + name: lmcache/lmstack-router + pullPolicy: IfNotPresent + + # Resource requirements + resources: + cpu: "2" + memory: "8Gi" + + # Environment variables + env: + - name: LOG_LEVEL + value: "info" + - name: METRICS_ENABLED + value: "true" + + # Node selector for pod scheduling + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/os + operator: In + values: + - linux diff --git a/config/samples/serving_v1alpha1_vllmruntime.yaml b/config/samples/serving_v1alpha1_vllmruntime.yaml new file mode 100644 index 000000000..3730c482f --- /dev/null +++ b/config/samples/serving_v1alpha1_vllmruntime.yaml @@ -0,0 +1,63 @@ +apiVersion: serving.vllm.ai/v1alpha1 +kind: VLLMRuntime +metadata: + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize + name: vllmruntime-sample +spec: + + # vLLM specific configurations + enableChunkedPrefill: false + enablePrefixCaching: false + tensorParallelSize: 1 + gpuMemoryUtilization: "0.8" + maxLoras: 4 + extraArgs: ["--disable-log-requests"] + v1: false + + # LM Cache configuration + lmCacheConfig: + enabled: true + cpuOffloadingBufferSize: "4Gi" + diskOffloadingBufferSize: "8Gi" + remoteUrl: "" + remoteSerde: "" + + # Model configuration + model: + modelURL: "meta-llama/Llama-3.1-8B" + enableLoRA: false + enableTool: false + toolCallParser: "" + maxModelLen: 4096 + dtype: "bfloat16" + maxNumSeqs: 32 + + # Environment variables + env: + - name: HF_HOME + value: "/data" + + # Resource requirements + resources: + cpu: "10" + memory: "32Gi" + gpu: "1" + + # Image configuration + image: + registry: "docker.io" + name: "lmcache/vllm-openai:2025-04-18" + pullPolicy: "IfNotPresent" + pullSecretName: "" + + # HuggingFace token secret (optional) + hfTokenSecret: + name: "huggingface-token" + + # Number of replicas + replicas: 1 + + # Deployment strategy + deploymentStrategy: "Recreate" diff --git a/go.mod b/go.mod new file mode 100644 index 000000000..be7288dec --- /dev/null +++ b/go.mod @@ -0,0 +1,100 @@ +module production-stack + +go 1.24.0 + +toolchain go1.24.2 + +require ( + github.com/onsi/ginkgo/v2 v2.23.4 + github.com/onsi/gomega v1.37.0 + k8s.io/apimachinery v0.33.0 + k8s.io/client-go v0.33.0 + sigs.k8s.io/controller-runtime v0.20.4 +) + +require ( + cel.dev/expr v0.18.0 // indirect + github.com/antlr4-go/antlr/v4 v4.13.0 // indirect + github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a // indirect + github.com/beorn7/perks v1.0.1 // indirect + github.com/blang/semver/v4 v4.0.0 // indirect + github.com/cenkalti/backoff/v4 v4.3.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/emicklei/go-restful/v3 v3.11.0 // indirect + github.com/evanphx/json-patch/v5 v5.9.11 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/fsnotify/fsnotify v1.7.0 // indirect + github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-logr/zapr v1.3.0 // indirect + github.com/go-openapi/jsonpointer v0.21.0 // indirect + github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/swag v0.23.0 // indirect + github.com/go-task/slim-sprig/v3 v3.0.0 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/google/btree v1.1.3 // indirect + github.com/google/cel-go v0.22.0 // indirect + github.com/google/gnostic-models v0.6.9 // indirect + github.com/google/go-cmp v0.7.0 // indirect + github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/prometheus/client_golang v1.19.1 // indirect + github.com/prometheus/client_model v0.6.1 // indirect + github.com/prometheus/common v0.55.0 // indirect + github.com/prometheus/procfs v0.15.1 // indirect + github.com/spf13/cobra v1.8.1 // indirect + github.com/spf13/pflag v1.0.5 // indirect + github.com/stoewer/go-strcase v1.3.0 // indirect + github.com/x448/float16 v0.8.4 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 // indirect + go.opentelemetry.io/otel v1.28.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0 // indirect + go.opentelemetry.io/otel/metric v1.28.0 // indirect + go.opentelemetry.io/otel/sdk v1.28.0 // indirect + go.opentelemetry.io/otel/trace v1.28.0 // indirect + go.opentelemetry.io/proto/otlp v1.3.1 // indirect + go.uber.org/automaxprocs v1.6.0 // indirect + go.uber.org/multierr v1.11.0 // indirect + go.uber.org/zap v1.27.0 // indirect + golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect + golang.org/x/net v0.38.0 // indirect + golang.org/x/oauth2 v0.27.0 // indirect + golang.org/x/sync v0.12.0 // indirect + golang.org/x/sys v0.32.0 // indirect + golang.org/x/term v0.30.0 // indirect + golang.org/x/text v0.23.0 // indirect + golang.org/x/time v0.9.0 // indirect + golang.org/x/tools v0.31.0 // indirect + gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 // indirect + google.golang.org/grpc v1.65.0 // indirect + google.golang.org/protobuf v1.36.5 // indirect + gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/api v0.33.0 // indirect + k8s.io/apiextensions-apiserver v0.32.1 // indirect + k8s.io/apiserver v0.32.1 // indirect + k8s.io/component-base v0.32.1 // indirect + k8s.io/klog/v2 v2.130.1 // indirect + k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect + k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 // indirect + sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.0 // indirect + sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect + sigs.k8s.io/randfill v1.0.0 // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect + sigs.k8s.io/yaml v1.4.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 000000000..d4a9595dd --- /dev/null +++ b/go.sum @@ -0,0 +1,254 @@ +cel.dev/expr v0.18.0 h1:CJ6drgk+Hf96lkLikr4rFf19WrU0BOWEihyZnI2TAzo= +cel.dev/expr v0.18.0/go.mod h1:MrpN08Q+lEBs+bGYdLxxHkZoUSsCp0nSKTs0nTymJgw= +github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI= +github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g= +github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a h1:idn718Q4B6AGu/h5Sxe66HYVdqdGu2l9Iebqhi/AEoA= +github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= +github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= +github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= +github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k= +github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ= +github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU= +github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= +github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= +github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= +github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= +github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= +github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= +github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= +github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= +github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= +github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= +github.com/google/cel-go v0.22.0 h1:b3FJZxpiv1vTMo2/5RDUqAHPxkT8mmMfJIrq1llbf7g= +github.com/google/cel-go v0.22.0/go.mod h1:BuznPXXfQDpXKWQ9sPW3TzlAJN5zzFe+i9tIs0yC4s8= +github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw= +github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus= +github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8= +github.com/onsi/gomega v1.37.0 h1:CdEG8g0S133B4OswTDC/5XPSzE1OeP29QOioj2PID2Y= +github.com/onsi/gomega v1.37.0/go.mod h1:8D9+Txp43QWKhM24yyOBEdpkzN8FvJyAwecBgsU4KU0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= +github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= +github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= +github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= +github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= +github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs= +github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 h1:4K4tsIXefpVJtvA/8srF4V4y0akAoPHkIslgAkjixJA= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0/go.mod h1:jjdQuTGVsXV4vSs+CJ2qYDeDPf9yIJV23qlIzBm73Vg= +go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo= +go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 h1:3Q/xZUyC1BBkualc9ROb4G8qkH90LXEIICcs5zv1OYY= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0/go.mod h1:s75jGIWA9OfCMzF0xr+ZgfrB5FEbbV7UuYo32ahUiFI= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0 h1:qFffATk0X+HD+f1Z8lswGiOQYKHRlzfmdJm0wEaVrFA= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0/go.mod h1:MOiCmryaYtc+V0Ei+Tx9o5S1ZjA7kzLucuVuyzBZloQ= +go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q= +go.opentelemetry.io/otel/metric v1.28.0/go.mod h1:Fb1eVBFZmLVTMb6PPohq3TO9IIhUisDsbJoL/+uQW4s= +go.opentelemetry.io/otel/sdk v1.28.0 h1:b9d7hIry8yZsgtbmM0DKyPWMMUMlK9NEKuIG4aBqWyE= +go.opentelemetry.io/otel/sdk v1.28.0/go.mod h1:oYj7ClPUA7Iw3m+r7GeEjz0qckQRJK2B8zjcZEfu7Pg= +go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g= +go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI= +go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= +go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= +go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= +go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= +golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= +golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= +golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M= +golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw= +golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20= +golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y= +golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= +golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= +golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= +golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.31.0 h1:0EedkvKDbh+qistFTd0Bcwe/YLh4vHwWEkiI0toFIBU= +golang.org/x/tools v0.31.0/go.mod h1:naFTU+Cev749tSJRXJlna0T3WxKvb1kWEx15xA4SdmQ= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= +gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= +google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 h1:YcyjlL1PRr2Q17/I0dPk2JmYS5CDXfcdb2Z3YRioEbw= +google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7/go.mod h1:OCdP9MfskevB/rbYvHTsXTtKC+3bHWajPdoKgjcYkfo= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 h1:2035KHhUv+EpyB+hWgJnaWKJOdX1E95w2S8Rr4uWKTs= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= +google.golang.org/grpc v1.65.0 h1:bs/cUb4lp1G5iImFFd3u5ixQzweKizoZJAwBNLR42lc= +google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ= +google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM= +google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= +gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.33.0 h1:yTgZVn1XEe6opVpP1FylmNrIFWuDqe2H0V8CT5gxfIU= +k8s.io/api v0.33.0/go.mod h1:CTO61ECK/KU7haa3qq8sarQ0biLq2ju405IZAd9zsiM= +k8s.io/apiextensions-apiserver v0.32.1 h1:hjkALhRUeCariC8DiVmb5jj0VjIc1N0DREP32+6UXZw= +k8s.io/apiextensions-apiserver v0.32.1/go.mod h1:sxWIGuGiYov7Io1fAS2X06NjMIk5CbRHc2StSmbaQto= +k8s.io/apimachinery v0.33.0 h1:1a6kHrJxb2hs4t8EE5wuR/WxKDwGN1FKH3JvDtA0CIQ= +k8s.io/apimachinery v0.33.0/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM= +k8s.io/apiserver v0.32.1 h1:oo0OozRos66WFq87Zc5tclUX2r0mymoVHRq8JmR7Aak= +k8s.io/apiserver v0.32.1/go.mod h1:UcB9tWjBY7aryeI5zAgzVJB/6k7E97bkr1RgqDz0jPw= +k8s.io/client-go v0.33.0 h1:UASR0sAYVUzs2kYuKn/ZakZlcs2bEHaizrrHUZg0G98= +k8s.io/client-go v0.33.0/go.mod h1:kGkd+l/gNGg8GYWAPr0xF1rRKvVWvzh9vmZAMXtaKOg= +k8s.io/component-base v0.32.1 h1:/5IfJ0dHIKBWysGV0yKTFfacZ5yNV1sulPh3ilJjRZk= +k8s.io/component-base v0.32.1/go.mod h1:j1iMMHi/sqAHeG5z+O9BFNCF698a1u0186zkjMZQ28w= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff h1:/usPimJzUKKu+m+TE36gUyGcf03XZEP0ZIKgKj35LS4= +k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8= +k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 h1:M3sRQVHv7vB20Xc2ybTt7ODCeFj6JSWYFzOFnYeS6Ro= +k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.0 h1:CPT0ExVicCzcpeN4baWEV2ko2Z/AsiZgEdwgcfwLgMo= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.0/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw= +sigs.k8s.io/controller-runtime v0.20.4 h1:X3c+Odnxz+iPTRobG4tp092+CvBU9UK0t/bRf+n0DGU= +sigs.k8s.io/controller-runtime v0.20.4/go.mod h1:xg2XB0K5ShQzAgsoujxuKN4LNXR2LfwwHsPj7Iaw+XY= +sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8= +sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo= +sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= +sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/structured-merge-diff/v4 v4.6.0 h1:IUA9nvMmnKWcj5jl84xn+T5MnlZKThmUW1TdblaLVAc= +sigs.k8s.io/structured-merge-diff/v4 v4.6.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/hack/boilerplate.go.txt b/hack/boilerplate.go.txt new file mode 100644 index 000000000..4671de8fb --- /dev/null +++ b/hack/boilerplate.go.txt @@ -0,0 +1,15 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go new file mode 100644 index 000000000..13578d9d9 --- /dev/null +++ b/internal/controller/suite_test.go @@ -0,0 +1,88 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "fmt" + "path/filepath" + "runtime" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + servingv1alpha1 "production-stack/api/v1alpha1" + // +kubebuilder:scaffold:imports +) + +// These tests use Ginkgo (BDD-style Go testing framework). Refer to +// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. + +var cfg *rest.Config +var k8sClient client.Client +var testEnv *envtest.Environment + +func TestControllers(t *testing.T) { + RegisterFailHandler(Fail) + + RunSpecs(t, "Controller Suite") +} + +var _ = BeforeSuite(func() { + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, + ErrorIfCRDPathMissing: true, + + // The BinaryAssetsDirectory is only required if you want to run the tests directly + // without call the makefile target test. If not informed it will look for the + // default path defined in controller-runtime which is /usr/local/kubebuilder/. + // Note that you must have the required binaries setup under the bin directory to run the tests directly. + BinaryAssetsDirectory: filepath.Join("..", "..", "bin", "k8s", + fmt.Sprintf("1.29.0-%s-%s", runtime.GOOS, runtime.GOARCH)), + } + + var err error + // cfg is defined in this file globally. + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + err = servingv1alpha1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + // +kubebuilder:scaffold:scheme + + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) +}) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + err := testEnv.Stop() + Expect(err).NotTo(HaveOccurred()) +}) diff --git a/internal/controller/vllmrouter_controller.go b/internal/controller/vllmrouter_controller.go new file mode 100644 index 000000000..002f9cddb --- /dev/null +++ b/internal/controller/vllmrouter_controller.go @@ -0,0 +1,383 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "fmt" + "reflect" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + + servingv1alpha1 "production-stack/api/v1alpha1" +) + +// VLLMRouterReconciler reconciles a VLLMRouter object +type VLLMRouterReconciler struct { + client.Client + Scheme *runtime.Scheme +} + +// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmrouters,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmrouters/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmrouters/finalizers,verbs=update +// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmruntimes,verbs=get;list;watch +// +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +func (r *VLLMRouterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := log.FromContext(ctx) + + // Fetch the VLLMRouter instance + router := &servingv1alpha1.VLLMRouter{} + err := r.Get(ctx, req.NamespacedName, router) + if err != nil { + if errors.IsNotFound(err) { + // Request object not found, could have been deleted after reconcile request. + // Return and don't requeue + log.Info("VLLMRouter resource not found. Ignoring since object must be deleted") + return ctrl.Result{}, nil + } + // Error reading the object - requeue the request. + log.Error(err, "Failed to get VLLMRouter") + return ctrl.Result{}, err + } + + // Check if the service already exists, if not create a new one + foundService := &corev1.Service{} + err = r.Get(ctx, types.NamespacedName{Name: router.Name, Namespace: router.Namespace}, foundService) + if err != nil && errors.IsNotFound(err) { + // Define a new service + svc := r.serviceForVLLMRouter(router) + log.Info("Creating a new Service", "Service.Namespace", svc.Namespace, "Service.Name", svc.Name) + err = r.Create(ctx, svc) + if err != nil { + log.Error(err, "Failed to create new Service", "Service.Namespace", svc.Namespace, "Service.Name", svc.Name) + return ctrl.Result{}, err + } + // Service created successfully - return and requeue + return ctrl.Result{Requeue: true}, nil + } else if err != nil { + log.Error(err, "Failed to get Service") + return ctrl.Result{}, err + } + + // Check if the deployment already exists, if not create a new one + found := &appsv1.Deployment{} + err = r.Get(ctx, types.NamespacedName{Name: router.Name, Namespace: router.Namespace}, found) + if err != nil && errors.IsNotFound(err) { + // Define a new deployment + dep := r.deploymentForVLLMRouter(router) + log.Info("Creating a new Deployment", "Deployment.Namespace", dep.Namespace, "Deployment.Name", dep.Name) + err = r.Create(ctx, dep) + if err != nil { + log.Error(err, "Failed to create new Deployment", "Deployment.Namespace", dep.Namespace, "Deployment.Name", dep.Name) + return ctrl.Result{}, err + } + // Deployment created successfully - return and requeue + return ctrl.Result{Requeue: true}, nil + } else if err != nil { + log.Error(err, "Failed to get Deployment") + return ctrl.Result{}, err + } + + // Update the deployment if needed + if r.deploymentNeedsUpdate(found, router) { + log.Info("Updating Deployment", "Deployment.Namespace", found.Namespace, "Deployment.Name", found.Name) + // Create new deployment spec + newDep := r.deploymentForVLLMRouter(router) + + err = r.Update(ctx, newDep) + if err != nil { + log.Error(err, "Failed to update Deployment", "Deployment.Namespace", found.Namespace, "Deployment.Name", found.Name) + return ctrl.Result{}, err + } + // Deployment updated successfully - return and requeue + return ctrl.Result{Requeue: true}, nil + } + + // Update the status + if err := r.updateStatus(ctx, router, found); err != nil { + log.Error(err, "Failed to update VLLMRouter status") + return ctrl.Result{}, err + } + + return ctrl.Result{}, nil +} + +// deploymentForVLLMRouter returns a VLLMRouter Deployment object +func (r *VLLMRouterReconciler) deploymentForVLLMRouter(router *servingv1alpha1.VLLMRouter) *appsv1.Deployment { + labels := map[string]string{ + "app": router.Name, + } + + // Add user-defined environment variables + env := []corev1.EnvVar{} + if router.Spec.Env != nil { + for _, e := range router.Spec.Env { + env = append(env, corev1.EnvVar{ + Name: e.Name, + Value: e.Value, + }) + } + } + + // Add VLLM API Key if specified + if router.Spec.VLLMApiKeySecret.Name != "" && router.Spec.VLLMApiKeyName != "" { + env = append(env, corev1.EnvVar{ + Name: "VLLM_API_KEY", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: router.Spec.VLLMApiKeySecret, + Key: router.Spec.VLLMApiKeyName, + }, + }, + }) + } + + // Build resource requirements + resources := corev1.ResourceRequirements{ + Requests: corev1.ResourceList{}, + Limits: corev1.ResourceList{}, + } + + if router.Spec.Resources.CPU != "" { + resources.Requests[corev1.ResourceCPU] = resource.MustParse(router.Spec.Resources.CPU) + resources.Limits[corev1.ResourceCPU] = resource.MustParse(router.Spec.Resources.CPU) + } + + if router.Spec.Resources.Memory != "" { + resources.Requests[corev1.ResourceMemory] = resource.MustParse(router.Spec.Resources.Memory) + resources.Limits[corev1.ResourceMemory] = resource.MustParse(router.Spec.Resources.Memory) + } + + // Get the image from Image spec or use default + image := router.Spec.Image.Registry + "/" + router.Spec.Image.Name + + // Get the image pull policy + imagePullPolicy := corev1.PullIfNotPresent + if router.Spec.Image.PullPolicy != "" { + imagePullPolicy = corev1.PullPolicy(router.Spec.Image.PullPolicy) + } + + // Build image pull secrets + var imagePullSecrets []corev1.LocalObjectReference + if router.Spec.Image.PullSecretName != "" { + imagePullSecrets = append(imagePullSecrets, corev1.LocalObjectReference{ + Name: router.Spec.Image.PullSecretName, + }) + } + + // Build container args + args := []string{ + "--host", "0.0.0.0", + "--port", fmt.Sprintf("%d", router.Spec.Port), + "--service-discovery", router.Spec.ServiceDiscovery, + } + + // Add service discovery specific args + if router.Spec.ServiceDiscovery == "k8s" { + args = append(args, + "--k8s-namespace", router.Namespace, + ) + } else if router.Spec.ServiceDiscovery == "static" { + if router.Spec.StaticBackends == "" || router.Spec.StaticModels == "" { + // This should be handled by validation webhook + panic("static service discovery requires both staticBackends and staticModels") + } + args = append(args, + "--static-backends", router.Spec.StaticBackends, + "--static-models", router.Spec.StaticModels, + ) + } + + // Add optional args + if router.Spec.RoutingLogic != "" { + args = append(args, "--routing-logic", router.Spec.RoutingLogic) + } + if router.Spec.SessionKey != "" { + args = append(args, "--session-key", router.Spec.SessionKey) + } + if router.Spec.EngineScrapeInterval != "" { + args = append(args, "--engine-stats-interval", router.Spec.EngineScrapeInterval) + } + if router.Spec.RequestStatsWindow != "" { + args = append(args, "--request-stats-window", router.Spec.RequestStatsWindow) + } + if router.Spec.ExtraArgs != nil { + args = append(args, router.Spec.ExtraArgs...) + } + + dep := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: router.Name, + Namespace: router.Namespace, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &router.Spec.Replicas, + Selector: &metav1.LabelSelector{ + MatchLabels: labels, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: labels, + }, + Spec: corev1.PodSpec{ + ServiceAccountName: router.Spec.ServiceAccountName, + ImagePullSecrets: imagePullSecrets, + Containers: []corev1.Container{ + { + Name: "router", + Image: image, + ImagePullPolicy: imagePullPolicy, + Args: args, + Env: env, + Ports: []corev1.ContainerPort{ + { + Name: "http", + ContainerPort: router.Spec.Port, + }, + }, + Resources: resources, + LivenessProbe: &corev1.Probe{ + InitialDelaySeconds: 30, + PeriodSeconds: 5, + FailureThreshold: 3, + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/health", + Port: intstr.FromInt(int(router.Spec.Port)), + }, + }, + }, + }, + }, + }, + }, + }, + } + + // Add node affinity if specified + if router.Spec.NodeSelectorTerms != nil { + dep.Spec.Template.Spec.Affinity = &corev1.Affinity{ + NodeAffinity: &corev1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ + NodeSelectorTerms: router.Spec.NodeSelectorTerms, + }, + }, + } + } + + // Set the owner reference + ctrl.SetControllerReference(router, dep, r.Scheme) + return dep +} + +// deploymentNeedsUpdate checks if the deployment needs to be updated +func (r *VLLMRouterReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, router *servingv1alpha1.VLLMRouter) bool { + // Generate the expected deployment + expectedDep := r.deploymentForVLLMRouter(router) + + // Compare image + if expectedDep.Spec.Template.Spec.Containers[0].Image != dep.Spec.Template.Spec.Containers[0].Image { + return true + } + + // Compare resources + expectedResources := expectedDep.Spec.Template.Spec.Containers[0].Resources + actualResources := dep.Spec.Template.Spec.Containers[0].Resources + if !reflect.DeepEqual(expectedResources, actualResources) { + return true + } + + return false +} + +// updateStatus updates the status of the VLLMRouter +func (r *VLLMRouterReconciler) updateStatus(ctx context.Context, router *servingv1alpha1.VLLMRouter, dep *appsv1.Deployment) error { + // Re-read the VLLMRouter to get the latest version + latestRouter := &servingv1alpha1.VLLMRouter{} + if err := r.Get(ctx, types.NamespacedName{Name: router.Name, Namespace: router.Namespace}, latestRouter); err != nil { + return err + } + + latestRouter.Status.LastUpdated = metav1.Now() + + // Update VLLMRouter status based on deployment status + if dep.Status.AvailableReplicas > 0 { + latestRouter.Status.Status = "Ready" + } else if dep.Status.UpdatedReplicas > 0 { + latestRouter.Status.Status = "Updating" + } else { + latestRouter.Status.Status = "NotReady" + } + + return r.Status().Update(ctx, latestRouter) +} + +// serviceForVLLMRouter returns a VLLMRouter Service object +func (r *VLLMRouterReconciler) serviceForVLLMRouter(router *servingv1alpha1.VLLMRouter) *corev1.Service { + labels := map[string]string{ + "app": router.Name, + } + + svc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: router.Name, + Namespace: router.Namespace, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeClusterIP, + Selector: labels, + Ports: []corev1.ServicePort{ + { + Name: "http", + Port: 80, + TargetPort: intstr.FromInt(int(router.Spec.Port)), + Protocol: corev1.ProtocolTCP, + }, + }, + }, + } + + // Set the owner reference + ctrl.SetControllerReference(router, svc, r.Scheme) + return svc +} + +// SetupWithManager sets up the controller with the Manager. +func (r *VLLMRouterReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&servingv1alpha1.VLLMRouter{}). + Owns(&appsv1.Deployment{}). + Owns(&corev1.Service{}). + Complete(r) +} diff --git a/internal/controller/vllmrouter_controller_test.go b/internal/controller/vllmrouter_controller_test.go new file mode 100644 index 000000000..fe68a9f5c --- /dev/null +++ b/internal/controller/vllmrouter_controller_test.go @@ -0,0 +1,84 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + servingv1alpha1 "production-stack/api/v1alpha1" +) + +var _ = Describe("VLLMRouter Controller", func() { + Context("When reconciling a resource", func() { + const resourceName = "test-resource" + + ctx := context.Background() + + typeNamespacedName := types.NamespacedName{ + Name: resourceName, + Namespace: "default", // TODO(user):Modify as needed + } + router := &servingv1alpha1.VLLMRouter{} + + BeforeEach(func() { + By("creating the custom resource for the Kind VLLMRouter") + err := k8sClient.Get(ctx, typeNamespacedName, router) + if err != nil && errors.IsNotFound(err) { + resource := &servingv1alpha1.VLLMRouter{ + ObjectMeta: metav1.ObjectMeta{ + Name: resourceName, + Namespace: "default", + }, + // TODO(user): Specify other spec details if needed. + } + Expect(k8sClient.Create(ctx, resource)).To(Succeed()) + } + }) + + AfterEach(func() { + // TODO(user): Cleanup logic after each test, like removing the resource instance. + resource := &servingv1alpha1.VLLMRouter{} + err := k8sClient.Get(ctx, typeNamespacedName, resource) + Expect(err).NotTo(HaveOccurred()) + + By("Cleanup the specific resource instance VLLMRouter") + Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) + }) + It("should successfully reconcile the resource", func() { + By("Reconciling the created resource") + controllerReconciler := &VLLMRouterReconciler{ + Client: k8sClient, + Scheme: k8sClient.Scheme(), + } + + _, err := controllerReconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: typeNamespacedName, + }) + Expect(err).NotTo(HaveOccurred()) + // TODO(user): Add more specific assertions depending on your controller's reconciliation logic. + // Example: If you expect a certain status condition after reconciliation, verify it here. + }) + }) +}) diff --git a/internal/controller/vllmruntime_controller.go b/internal/controller/vllmruntime_controller.go new file mode 100644 index 000000000..97cb59885 --- /dev/null +++ b/internal/controller/vllmruntime_controller.go @@ -0,0 +1,534 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "fmt" + "reflect" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + + servingv1alpha1 "production-stack/api/v1alpha1" +) + +// VLLMRuntimeReconciler reconciles a VLLMRuntime object +type VLLMRuntimeReconciler struct { + client.Client + Scheme *runtime.Scheme +} + +// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmruntimes,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmruntimes/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmruntimes/finalizers,verbs=update +// +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +func (r *VLLMRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := log.FromContext(ctx) + + // Fetch the VLLMRuntime instance + vllmRuntime := &servingv1alpha1.VLLMRuntime{} + err := r.Get(ctx, req.NamespacedName, vllmRuntime) + if err != nil { + if errors.IsNotFound(err) { + // Request object not found, could have been deleted after reconcile request. + // Return and don't requeue + log.Info("VLLMRuntime resource not found. Ignoring since object must be deleted") + return ctrl.Result{}, nil + } + // Error reading the object - requeue the request. + log.Error(err, "Failed to get VLLMRuntime") + return ctrl.Result{}, err + } + + // Check if the service already exists, if not create a new one + foundService := &corev1.Service{} + err = r.Get(ctx, types.NamespacedName{Name: vllmRuntime.Name, Namespace: vllmRuntime.Namespace}, foundService) + if err != nil && errors.IsNotFound(err) { + // Define a new service + svc := r.serviceForVLLMRuntime(vllmRuntime) + log.Info("Creating a new Service", "Service.Namespace", svc.Namespace, "Service.Name", svc.Name) + err = r.Create(ctx, svc) + if err != nil { + log.Error(err, "Failed to create new Service", "Service.Namespace", svc.Namespace, "Service.Name", svc.Name) + return ctrl.Result{}, err + } + // Service created successfully - return and requeue + return ctrl.Result{Requeue: true}, nil + } else if err != nil { + log.Error(err, "Failed to get Service") + return ctrl.Result{}, err + } + + // Update the service if needed + if r.serviceNeedsUpdate(foundService, vllmRuntime) { + log.Info("Updating Service", "Service.Namespace", foundService.Namespace, "Service.Name", foundService.Name) + // Create new service spec + newSvc := r.serviceForVLLMRuntime(vllmRuntime) + + err = r.Update(ctx, newSvc) + if err != nil { + log.Error(err, "Failed to update Service", "Service.Namespace", foundService.Namespace, "Service.Name", foundService.Name) + return ctrl.Result{}, err + } + // Service updated successfully - return and requeue + return ctrl.Result{Requeue: true}, nil + } + + // Check if the deployment already exists, if not create a new one + found := &appsv1.Deployment{} + err = r.Get(ctx, types.NamespacedName{Name: vllmRuntime.Name, Namespace: vllmRuntime.Namespace}, found) + if err != nil && errors.IsNotFound(err) { + // Define a new deployment + dep := r.deploymentForVLLMRuntime(vllmRuntime) + log.Info("Creating a new Deployment", "Deployment.Namespace", dep.Namespace, "Deployment.Name", dep.Name) + err = r.Create(ctx, dep) + if err != nil { + log.Error(err, "Failed to create new Deployment", "Deployment.Namespace", dep.Namespace, "Deployment.Name", dep.Name) + return ctrl.Result{}, err + } + // Deployment created successfully - return and requeue + return ctrl.Result{Requeue: true}, nil + } else if err != nil { + log.Error(err, "Failed to get Deployment") + return ctrl.Result{}, err + } + + // Update the deployment if needed + if r.deploymentNeedsUpdate(found, vllmRuntime) { + log.Info("Updating Deployment", "Deployment.Namespace", found.Namespace, "Deployment.Name", found.Name) + // Create new deployment spec + newDep := r.deploymentForVLLMRuntime(vllmRuntime) + + err = r.Update(ctx, newDep) + if err != nil { + log.Error(err, "Failed to update Deployment", "Deployment.Namespace", found.Namespace, "Deployment.Name", found.Name) + return ctrl.Result{}, err + } + // Deployment updated successfully - return and requeue + return ctrl.Result{Requeue: true}, nil + } + + // Update the status + if err := r.updateStatus(ctx, vllmRuntime, found); err != nil { + log.Error(err, "Failed to update VLLMRuntime status") + return ctrl.Result{}, err + } + + return ctrl.Result{}, nil +} + +// deploymentForVLLMRuntime returns a VLLMRuntime Deployment object +func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *servingv1alpha1.VLLMRuntime) *appsv1.Deployment { + labels := map[string]string{ + "app": vllmRuntime.Name, + } + + // Build command line arguments + args := []string{ + "--model", + vllmRuntime.Spec.Model.ModelURL, + "--host", + "0.0.0.0", + "--port", + fmt.Sprintf("%d", vllmRuntime.Spec.Port), + } + + if vllmRuntime.Spec.Model.EnableLoRA { + args = append(args, "--enable-lora") + } + + if vllmRuntime.Spec.Model.EnableTool { + args = append(args, "--enable-auto-tool-choice") + } + + if vllmRuntime.Spec.Model.ToolCallParser != "" { + args = append(args, "--tool-call-parser", vllmRuntime.Spec.Model.ToolCallParser) + } + + if vllmRuntime.Spec.EnableChunkedPrefill { + args = append(args, "--enable-chunked-prefill") + } else { + args = append(args, "--no-enable-chunked-prefill") + } + + if vllmRuntime.Spec.EnablePrefixCaching { + args = append(args, "--enable-prefix-caching") + } else { + args = append(args, "--no-enable-prefix-caching") + } + + if vllmRuntime.Spec.Model.MaxModelLen > 0 { + args = append(args, "--max-model-len", fmt.Sprintf("%d", vllmRuntime.Spec.Model.MaxModelLen)) + } + + if vllmRuntime.Spec.Model.DType != "" { + args = append(args, "--dtype", vllmRuntime.Spec.Model.DType) + } + + if vllmRuntime.Spec.TensorParallelSize > 0 { + args = append(args, "--tensor-parallel-size", fmt.Sprintf("%d", vllmRuntime.Spec.TensorParallelSize)) + } + + if vllmRuntime.Spec.Model.MaxNumSeqs > 0 { + args = append(args, "--max-num-seqs", fmt.Sprintf("%d", vllmRuntime.Spec.Model.MaxNumSeqs)) + } + + if vllmRuntime.Spec.GpuMemoryUtilization != "" { + args = append(args, "--gpu_memory_utilization", vllmRuntime.Spec.GpuMemoryUtilization) + } + + if vllmRuntime.Spec.MaxLoras > 0 { + args = append(args, "--max_loras", fmt.Sprintf("%d", vllmRuntime.Spec.MaxLoras)) + } + + if vllmRuntime.Spec.ExtraArgs != nil { + args = append(args, vllmRuntime.Spec.ExtraArgs...) + } + + // Build environment variables + env := []corev1.EnvVar{} + if vllmRuntime.Spec.V1 { + env = append(env, corev1.EnvVar{ + Name: "VLLM_USE_V1", + Value: "1", + }) + } else { + env = append(env, corev1.EnvVar{ + Name: "VLLM_USE_V1", + Value: "0", + }) + } + + // LM Cache configuration + if vllmRuntime.Spec.LMCacheConfig.Enabled { + env = append(env, + corev1.EnvVar{ + Name: "LMCACHE_LOG_LEVEL", + Value: "DEBUG", + }, + corev1.EnvVar{ + Name: "LMCACHE_USE_EXPERIMENTAL", + Value: "True", + }, + corev1.EnvVar{ + Name: "VLLM_RPC_TIMEOUT", + Value: "1000000", + }, + ) + + if vllmRuntime.Spec.LMCacheConfig.CPUOffloadingBufferSize != "" { + env = append(env, + corev1.EnvVar{ + Name: "LMCACHE_LOCAL_CPU", + Value: "True", + }, + corev1.EnvVar{ + Name: "LMCACHE_MAX_LOCAL_CPU_SIZE", + Value: vllmRuntime.Spec.LMCacheConfig.CPUOffloadingBufferSize, + }, + ) + } + + if vllmRuntime.Spec.LMCacheConfig.DiskOffloadingBufferSize != "" { + env = append(env, + corev1.EnvVar{ + Name: "LMCACHE_LOCAL_DISK", + Value: "True", + }, + corev1.EnvVar{ + Name: "LMCACHE_MAX_LOCAL_DISK_SIZE", + Value: vllmRuntime.Spec.LMCacheConfig.DiskOffloadingBufferSize, + }, + ) + } + + if vllmRuntime.Spec.LMCacheConfig.RemoteURL != "" { + env = append(env, + corev1.EnvVar{ + Name: "LMCACHE_REMOTE_URL", + Value: vllmRuntime.Spec.LMCacheConfig.RemoteURL, + }, + corev1.EnvVar{ + Name: "LMCACHE_REMOTE_SERDE", + Value: vllmRuntime.Spec.LMCacheConfig.RemoteSerde, + }, + ) + } + } + + // Add user-defined environment variables + if vllmRuntime.Spec.Env != nil { + for _, e := range vllmRuntime.Spec.Env { + env = append(env, corev1.EnvVar{ + Name: e.Name, + Value: e.Value, + }) + } + } + + // Build resource requirements + resources := corev1.ResourceRequirements{ + Requests: corev1.ResourceList{}, + Limits: corev1.ResourceList{}, + } + + if vllmRuntime.Spec.Resources.CPU != "" { + resources.Requests[corev1.ResourceCPU] = resource.MustParse(vllmRuntime.Spec.Resources.CPU) + resources.Limits[corev1.ResourceCPU] = resource.MustParse(vllmRuntime.Spec.Resources.CPU) + } + + if vllmRuntime.Spec.Resources.Memory != "" { + resources.Requests[corev1.ResourceMemory] = resource.MustParse(vllmRuntime.Spec.Resources.Memory) + resources.Limits[corev1.ResourceMemory] = resource.MustParse(vllmRuntime.Spec.Resources.Memory) + } + + if vllmRuntime.Spec.Resources.GPU != "" { + // Parse GPU resource as a decimal value + gpuResource := resource.MustParse(vllmRuntime.Spec.Resources.GPU) + resources.Requests["nvidia.com/gpu"] = gpuResource + resources.Limits["nvidia.com/gpu"] = gpuResource + } + + // Get the image from Image spec or use default + image := vllmRuntime.Spec.Image.Registry + "/" + vllmRuntime.Spec.Image.Name + + // Get the image pull policy + imagePullPolicy := corev1.PullIfNotPresent + if vllmRuntime.Spec.Image.PullPolicy != "" { + imagePullPolicy = corev1.PullPolicy(vllmRuntime.Spec.Image.PullPolicy) + } + + // Build image pull secrets + var imagePullSecrets []corev1.LocalObjectReference + if vllmRuntime.Spec.Image.PullSecretName != "" { + imagePullSecrets = append(imagePullSecrets, corev1.LocalObjectReference{ + Name: vllmRuntime.Spec.Image.PullSecretName, + }) + } + + if vllmRuntime.Spec.HFTokenSecret.Name != "" { + env = append(env, corev1.EnvVar{ + Name: "HF_TOKEN", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: vllmRuntime.Spec.HFTokenSecret, + Key: vllmRuntime.Spec.HFTokenName, + }, + }, + }) + } + + dep := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: vllmRuntime.Name, + Namespace: vllmRuntime.Namespace, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &vllmRuntime.Spec.Replicas, + Strategy: appsv1.DeploymentStrategy{ + Type: appsv1.DeploymentStrategyType(vllmRuntime.Spec.DeployStrategy), + }, + Selector: &metav1.LabelSelector{ + MatchLabels: labels, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: labels, + }, + Spec: corev1.PodSpec{ + ImagePullSecrets: imagePullSecrets, + Containers: []corev1.Container{ + { + Name: "vllm", + Image: image, + ImagePullPolicy: imagePullPolicy, + Command: []string{"python3", "-m", "vllm.entrypoints.openai.api_server"}, + Args: args, + Env: env, + Ports: []corev1.ContainerPort{ + { + Name: "http", + ContainerPort: vllmRuntime.Spec.Port, + }, + }, + Resources: resources, + }, + }, + }, + }, + }, + } + + // Set the owner reference + ctrl.SetControllerReference(vllmRuntime, dep, r.Scheme) + return dep +} + +// deploymentNeedsUpdate checks if the deployment needs to be updated +func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr *servingv1alpha1.VLLMRuntime) bool { + // Generate the expected deployment + expectedDep := r.deploymentForVLLMRuntime(vr) + + // Compare model URL + expectedModelURL := vr.Spec.Model.ModelURL + actualModelURL := "" + // For vllm serve, the model URL is the first argument after the command + if len(dep.Spec.Template.Spec.Containers[0].Args) > 0 { + actualModelURL = dep.Spec.Template.Spec.Containers[0].Args[1] + } + if expectedModelURL != actualModelURL { + return true + } + + // Compare port + expectedPort := vr.Spec.Port + actualPort := dep.Spec.Template.Spec.Containers[0].Ports[0].ContainerPort + if expectedPort != actualPort { + return true + } + + // Compare image + if expectedDep.Spec.Template.Spec.Containers[0].Image != dep.Spec.Template.Spec.Containers[0].Image { + return true + } + + // Compare resources + expectedResources := expectedDep.Spec.Template.Spec.Containers[0].Resources + actualResources := dep.Spec.Template.Spec.Containers[0].Resources + if !reflect.DeepEqual(expectedResources, actualResources) { + return true + } + + // Compare LM Cache configuration + expectedLMCacheConfig := vr.Spec.LMCacheConfig + actualLMCacheConfig := dep.Spec.Template.Spec.Containers[0].Env + + // Extract actual values from environment variables + actualEnabled := false + actualCPUOffloadingBufferSize := "" + actualDiskOffloadingBufferSize := "" + + for _, env := range actualLMCacheConfig { + switch env.Name { + case "LMCACHE_USE_EXPERIMENTAL": + actualEnabled = env.Value == "True" + case "LMCACHE_MAX_LOCAL_CPU_SIZE": + actualCPUOffloadingBufferSize = env.Value + case "LMCACHE_MAX_LOCAL_DISK_SIZE": + actualDiskOffloadingBufferSize = env.Value + } + } + + // Compare specific fields + if expectedLMCacheConfig.Enabled != actualEnabled || + expectedLMCacheConfig.CPUOffloadingBufferSize != actualCPUOffloadingBufferSize || + expectedLMCacheConfig.DiskOffloadingBufferSize != actualDiskOffloadingBufferSize { + return true + } + + return false +} + +// updateStatus updates the status of the VLLMRuntime +func (r *VLLMRuntimeReconciler) updateStatus(ctx context.Context, vr *servingv1alpha1.VLLMRuntime, dep *appsv1.Deployment) error { + // Re-read the VLLMRuntime to get the latest version + latestVR := &servingv1alpha1.VLLMRuntime{} + if err := r.Get(ctx, types.NamespacedName{Name: vr.Name, Namespace: vr.Namespace}, latestVR); err != nil { + return err + } + + latestVR.Status.LastUpdated = metav1.Now() + + // Update model status based on deployment status + if dep.Status.AvailableReplicas > 0 { + latestVR.Status.ModelStatus = "Ready" + } else if dep.Status.UpdatedReplicas > 0 { + // If we have updated replicas but they're not yet available, mark as updating + latestVR.Status.ModelStatus = "Updating" + } else { + latestVR.Status.ModelStatus = "NotReady" + } + + return r.Status().Update(ctx, latestVR) +} + +// serviceForVLLMRuntime returns a VLLMRuntime Service object +func (r *VLLMRuntimeReconciler) serviceForVLLMRuntime(vllmRuntime *servingv1alpha1.VLLMRuntime) *corev1.Service { + labels := map[string]string{ + "app": vllmRuntime.Name, + } + + svc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: vllmRuntime.Name, + Namespace: vllmRuntime.Namespace, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeClusterIP, + Selector: labels, + Ports: []corev1.ServicePort{ + { + Name: "http", + Port: 80, + TargetPort: intstr.FromInt(int(vllmRuntime.Spec.Port)), + Protocol: corev1.ProtocolTCP, + }, + }, + }, + } + + // Set the owner reference + ctrl.SetControllerReference(vllmRuntime, svc, r.Scheme) + return svc +} + +// serviceNeedsUpdate checks if the service needs to be updated +func (r *VLLMRuntimeReconciler) serviceNeedsUpdate(svc *corev1.Service, vr *servingv1alpha1.VLLMRuntime) bool { + // Compare target port + expectedTargetPort := int(vr.Spec.Port) + actualTargetPort := svc.Spec.Ports[0].TargetPort.IntValue() + if expectedTargetPort != actualTargetPort { + return true + } + + return false +} + +// SetupWithManager sets up the controller with the Manager. +func (r *VLLMRuntimeReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&servingv1alpha1.VLLMRuntime{}). + Owns(&appsv1.Deployment{}). + Owns(&corev1.Service{}). + Complete(r) +} diff --git a/internal/controller/vllmruntime_controller_test.go b/internal/controller/vllmruntime_controller_test.go new file mode 100644 index 000000000..9f07fc776 --- /dev/null +++ b/internal/controller/vllmruntime_controller_test.go @@ -0,0 +1,84 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + servingv1alpha1 "production-stack/api/v1alpha1" +) + +var _ = Describe("VLLMRuntime Controller", func() { + Context("When reconciling a resource", func() { + const resourceName = "test-resource" + + ctx := context.Background() + + typeNamespacedName := types.NamespacedName{ + Name: resourceName, + Namespace: "default", // TODO(user):Modify as needed + } + vllmruntime := &servingv1alpha1.VLLMRuntime{} + + BeforeEach(func() { + By("creating the custom resource for the Kind VLLMRuntime") + err := k8sClient.Get(ctx, typeNamespacedName, vllmruntime) + if err != nil && errors.IsNotFound(err) { + resource := &servingv1alpha1.VLLMRuntime{ + ObjectMeta: metav1.ObjectMeta{ + Name: resourceName, + Namespace: "default", + }, + // TODO(user): Specify other spec details if needed. + } + Expect(k8sClient.Create(ctx, resource)).To(Succeed()) + } + }) + + AfterEach(func() { + // TODO(user): Cleanup logic after each test, like removing the resource instance. + resource := &servingv1alpha1.VLLMRuntime{} + err := k8sClient.Get(ctx, typeNamespacedName, resource) + Expect(err).NotTo(HaveOccurred()) + + By("Cleanup the specific resource instance VLLMRuntime") + Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) + }) + It("should successfully reconcile the resource", func() { + By("Reconciling the created resource") + controllerReconciler := &VLLMRuntimeReconciler{ + Client: k8sClient, + Scheme: k8sClient.Scheme(), + } + + _, err := controllerReconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: typeNamespacedName, + }) + Expect(err).NotTo(HaveOccurred()) + // TODO(user): Add more specific assertions depending on your controller's reconciliation logic. + // Example: If you expect a certain status condition after reconciliation, verify it here. + }) + }) +}) From 7907cd0ae4f52e4b46979b6d0f43d0802bd7a781 Mon Sep 17 00:00:00 2001 From: royyhuang Date: Fri, 9 May 2025 01:13:09 +0000 Subject: [PATCH 02/14] move opertor to a secondary dir instead of in root dir Signed-off-by: royyhuang --- .gitignore | 3 + ...production-stack.vllm.ai_staticroutes.yaml | 218 ------------------ .dockerignore => operator/.dockerignore | 0 Dockerfile => operator/Dockerfile | 0 Makefile => operator/Makefile | 0 PROJECT => operator/PROJECT | 0 .../api}/v1alpha1/groupversion_info.go | 0 .../api}/v1alpha1/vllmrouter_types.go | 0 .../api}/v1alpha1/vllmruntime_types.go | 0 .../api}/v1alpha1/zz_generated.deepcopy.go | 0 {cmd => operator/cmd}/main.go | 0 .../bases/serving.vllm.ai_vllmrouters.yaml | 0 .../bases/serving.vllm.ai_vllmruntimes.yaml | 0 .../config}/crd/kustomization.yaml | 0 .../config}/crd/kustomizeconfig.yaml | 0 .../default/cert_metrics_manager_patch.yaml | 0 .../config}/default/kustomization.yaml | 0 .../default/manager_metrics_patch.yaml | 0 .../config}/default/metrics_service.yaml | 0 .../config}/manager/deployment.yaml | 0 .../config}/manager/kustomization.yaml | 0 .../config}/manager/namespace.yaml | 0 .../network-policy/allow-metrics-traffic.yaml | 0 .../config}/network-policy/kustomization.yaml | 0 .../config}/prometheus/kustomization.yaml | 0 .../config}/prometheus/monitor.yaml | 0 .../config}/prometheus/monitor_tls_patch.yaml | 0 .../config}/rbac/kustomization.yaml | 0 .../config}/rbac/leader_election_role.yaml | 0 .../rbac/leader_election_role_binding.yaml | 0 .../config}/rbac/metrics_auth_role.yaml | 0 .../rbac/metrics_auth_role_binding.yaml | 0 .../config}/rbac/metrics_reader_role.yaml | 0 .../config}/rbac/pod_viewer_role.yaml | 0 {config => operator/config}/rbac/role.yaml | 34 --- .../config}/rbac/role_binding.yaml | 0 .../config}/rbac/service_account.yaml | 0 .../config}/rbac/vllmrouter_admin_role.yaml | 0 .../config}/rbac/vllmrouter_editor_role.yaml | 0 .../config}/rbac/vllmrouter_role_binding.yaml | 0 .../rbac/vllmrouter_service_account.yaml | 0 .../config}/rbac/vllmrouter_viewer_role.yaml | 0 .../config}/rbac/vllmruntime_admin_role.yaml | 0 .../config}/rbac/vllmruntime_editor_role.yaml | 0 .../config}/rbac/vllmruntime_viewer_role.yaml | 0 .../config}/samples/kustomization.yaml | 0 .../samples/serving_v1alpha1_router.yaml | 0 .../samples/serving_v1alpha1_vllmruntime.yaml | 0 go.mod => operator/go.mod | 0 go.sum => operator/go.sum | 0 {hack => operator/hack}/boilerplate.go.txt | 0 .../internal}/controller/suite_test.go | 0 .../controller/vllmrouter_controller.go | 0 .../controller/vllmrouter_controller_test.go | 0 .../controller/vllmruntime_controller.go | 0 .../controller/vllmruntime_controller_test.go | 0 56 files changed, 3 insertions(+), 252 deletions(-) delete mode 100644 config/crd/bases/production-stack.vllm.ai_staticroutes.yaml rename .dockerignore => operator/.dockerignore (100%) rename Dockerfile => operator/Dockerfile (100%) rename Makefile => operator/Makefile (100%) rename PROJECT => operator/PROJECT (100%) rename {api => operator/api}/v1alpha1/groupversion_info.go (100%) rename {api => operator/api}/v1alpha1/vllmrouter_types.go (100%) rename {api => operator/api}/v1alpha1/vllmruntime_types.go (100%) rename {api => operator/api}/v1alpha1/zz_generated.deepcopy.go (100%) rename {cmd => operator/cmd}/main.go (100%) rename {config => operator/config}/crd/bases/serving.vllm.ai_vllmrouters.yaml (100%) rename {config => operator/config}/crd/bases/serving.vllm.ai_vllmruntimes.yaml (100%) rename {config => operator/config}/crd/kustomization.yaml (100%) rename {config => operator/config}/crd/kustomizeconfig.yaml (100%) rename {config => operator/config}/default/cert_metrics_manager_patch.yaml (100%) rename {config => operator/config}/default/kustomization.yaml (100%) rename {config => operator/config}/default/manager_metrics_patch.yaml (100%) rename {config => operator/config}/default/metrics_service.yaml (100%) rename {config => operator/config}/manager/deployment.yaml (100%) rename {config => operator/config}/manager/kustomization.yaml (100%) rename {config => operator/config}/manager/namespace.yaml (100%) rename {config => operator/config}/network-policy/allow-metrics-traffic.yaml (100%) rename {config => operator/config}/network-policy/kustomization.yaml (100%) rename {config => operator/config}/prometheus/kustomization.yaml (100%) rename {config => operator/config}/prometheus/monitor.yaml (100%) rename {config => operator/config}/prometheus/monitor_tls_patch.yaml (100%) rename {config => operator/config}/rbac/kustomization.yaml (100%) rename {config => operator/config}/rbac/leader_election_role.yaml (100%) rename {config => operator/config}/rbac/leader_election_role_binding.yaml (100%) rename {config => operator/config}/rbac/metrics_auth_role.yaml (100%) rename {config => operator/config}/rbac/metrics_auth_role_binding.yaml (100%) rename {config => operator/config}/rbac/metrics_reader_role.yaml (100%) rename {config => operator/config}/rbac/pod_viewer_role.yaml (100%) rename {config => operator/config}/rbac/role.yaml (63%) rename {config => operator/config}/rbac/role_binding.yaml (100%) rename {config => operator/config}/rbac/service_account.yaml (100%) rename {config => operator/config}/rbac/vllmrouter_admin_role.yaml (100%) rename {config => operator/config}/rbac/vllmrouter_editor_role.yaml (100%) rename {config => operator/config}/rbac/vllmrouter_role_binding.yaml (100%) rename {config => operator/config}/rbac/vllmrouter_service_account.yaml (100%) rename {config => operator/config}/rbac/vllmrouter_viewer_role.yaml (100%) rename {config => operator/config}/rbac/vllmruntime_admin_role.yaml (100%) rename {config => operator/config}/rbac/vllmruntime_editor_role.yaml (100%) rename {config => operator/config}/rbac/vllmruntime_viewer_role.yaml (100%) rename {config => operator/config}/samples/kustomization.yaml (100%) rename {config => operator/config}/samples/serving_v1alpha1_router.yaml (100%) rename {config => operator/config}/samples/serving_v1alpha1_vllmruntime.yaml (100%) rename go.mod => operator/go.mod (100%) rename go.sum => operator/go.sum (100%) rename {hack => operator/hack}/boilerplate.go.txt (100%) rename {internal => operator/internal}/controller/suite_test.go (100%) rename {internal => operator/internal}/controller/vllmrouter_controller.go (100%) rename {internal => operator/internal}/controller/vllmrouter_controller_test.go (100%) rename {internal => operator/internal}/controller/vllmruntime_controller.go (100%) rename {internal => operator/internal}/controller/vllmruntime_controller_test.go (100%) diff --git a/.gitignore b/.gitignore index 1341491df..b0adb89f1 100644 --- a/.gitignore +++ b/.gitignore @@ -178,3 +178,6 @@ docs/book/src/docs /testdata/**/go.sum /docs/book/src/simple-external-plugin-tutorial/testdata/sampleexternalplugin/v1/bin /testdata/**legacy** + +# skip operator's test for now +operator/test diff --git a/config/crd/bases/production-stack.vllm.ai_staticroutes.yaml b/config/crd/bases/production-stack.vllm.ai_staticroutes.yaml deleted file mode 100644 index cd6dd48bd..000000000 --- a/config/crd/bases/production-stack.vllm.ai_staticroutes.yaml +++ /dev/null @@ -1,218 +0,0 @@ ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.17.2 - name: staticroutes.production-stack.vllm.ai -spec: - group: production-stack.vllm.ai - names: - kind: StaticRoute - listKind: StaticRouteList - plural: staticroutes - singular: staticroute - scope: Namespaced - versions: - - name: v1alpha1 - schema: - openAPIV3Schema: - description: StaticRoute is the Schema for the staticroutes API - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: StaticRouteSpec defines the desired state of StaticRoute - properties: - configMapName: - description: ConfigMapName is the name of the ConfigMap to create - with the dynamic config - type: string - healthCheck: - description: HealthCheck defines the health check configuration for - the router - properties: - failureThreshold: - default: 3 - description: Minimum consecutive failures for the probe to be - considered failed - format: int32 - minimum: 1 - type: integer - periodSeconds: - default: 10 - description: Number of seconds between probe attempts - format: int32 - minimum: 1 - type: integer - successThreshold: - default: 1 - description: Minimum consecutive successes for the probe to be - considered successful - format: int32 - minimum: 1 - type: integer - timeoutSeconds: - default: 5 - description: Number of seconds after which the probe times out - format: int32 - minimum: 1 - type: integer - type: object - routerRef: - description: RouterRef is a reference to the router service - properties: - apiVersion: - description: API version of the referent. - type: string - fieldPath: - description: |- - If referring to a piece of an object instead of an entire object, this string - should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. - For example, if the object reference is to a container within a pod, this would take on a value like: - "spec.containers{name}" (where "name" refers to the name of the container that triggered - the event) or if no container name is specified "spec.containers[2]" (container with - index 2 in this pod). This syntax is chosen only to have some well-defined way of - referencing a part of an object. - type: string - kind: - description: |- - Kind of the referent. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - namespace: - description: |- - Namespace of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ - type: string - resourceVersion: - description: |- - Specific resourceVersion to which this reference is made, if any. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency - type: string - uid: - description: |- - UID of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids - type: string - type: object - x-kubernetes-map-type: atomic - routingLogic: - default: roundrobin - description: RoutingLogic specifies the routing logic to use - enum: - - roundrobin - type: string - serviceDiscovery: - default: static - description: ServiceDiscovery specifies the service discovery method - enum: - - static - type: string - staticBackends: - description: StaticBackends is a comma-separated list of backend URLs - type: string - staticModels: - description: StaticModels is a comma-separated list of model names - type: string - required: - - routingLogic - - serviceDiscovery - - staticBackends - - staticModels - type: object - status: - description: StaticRouteStatus defines the observed state of StaticRoute - properties: - conditions: - description: Conditions represent the latest available observations - of the StaticRoute's state - items: - description: Condition contains details for one aspect of the current - state of this API Resource. - properties: - lastTransitionTime: - description: |- - lastTransitionTime is the last time the condition transitioned from one status to another. - This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. - format: date-time - type: string - message: - description: |- - message is a human readable message indicating details about the transition. - This may be an empty string. - maxLength: 32768 - type: string - observedGeneration: - description: |- - observedGeneration represents the .metadata.generation that the condition was set based upon. - For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date - with respect to the current state of the instance. - format: int64 - minimum: 0 - type: integer - reason: - description: |- - reason contains a programmatic identifier indicating the reason for the condition's last transition. - Producers of specific condition types may define expected values and meanings for this field, - and whether the values are considered a guaranteed API. - The value should be a CamelCase string. - This field may not be empty. - maxLength: 1024 - minLength: 1 - pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ - type: string - status: - description: status of the condition, one of True, False, Unknown. - enum: - - "True" - - "False" - - Unknown - type: string - type: - description: type of condition in CamelCase or in foo.example.com/CamelCase. - maxLength: 316 - pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ - type: string - required: - - lastTransitionTime - - message - - reason - - status - - type - type: object - type: array - configMapRef: - description: ConfigMapRef is a reference to the created ConfigMap - type: string - lastAppliedTime: - description: LastAppliedTime is the last time the configuration was - applied to the router - format: date-time - type: string - type: object - type: object - served: true - storage: true - subresources: - status: {} diff --git a/.dockerignore b/operator/.dockerignore similarity index 100% rename from .dockerignore rename to operator/.dockerignore diff --git a/Dockerfile b/operator/Dockerfile similarity index 100% rename from Dockerfile rename to operator/Dockerfile diff --git a/Makefile b/operator/Makefile similarity index 100% rename from Makefile rename to operator/Makefile diff --git a/PROJECT b/operator/PROJECT similarity index 100% rename from PROJECT rename to operator/PROJECT diff --git a/api/v1alpha1/groupversion_info.go b/operator/api/v1alpha1/groupversion_info.go similarity index 100% rename from api/v1alpha1/groupversion_info.go rename to operator/api/v1alpha1/groupversion_info.go diff --git a/api/v1alpha1/vllmrouter_types.go b/operator/api/v1alpha1/vllmrouter_types.go similarity index 100% rename from api/v1alpha1/vllmrouter_types.go rename to operator/api/v1alpha1/vllmrouter_types.go diff --git a/api/v1alpha1/vllmruntime_types.go b/operator/api/v1alpha1/vllmruntime_types.go similarity index 100% rename from api/v1alpha1/vllmruntime_types.go rename to operator/api/v1alpha1/vllmruntime_types.go diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/operator/api/v1alpha1/zz_generated.deepcopy.go similarity index 100% rename from api/v1alpha1/zz_generated.deepcopy.go rename to operator/api/v1alpha1/zz_generated.deepcopy.go diff --git a/cmd/main.go b/operator/cmd/main.go similarity index 100% rename from cmd/main.go rename to operator/cmd/main.go diff --git a/config/crd/bases/serving.vllm.ai_vllmrouters.yaml b/operator/config/crd/bases/serving.vllm.ai_vllmrouters.yaml similarity index 100% rename from config/crd/bases/serving.vllm.ai_vllmrouters.yaml rename to operator/config/crd/bases/serving.vllm.ai_vllmrouters.yaml diff --git a/config/crd/bases/serving.vllm.ai_vllmruntimes.yaml b/operator/config/crd/bases/serving.vllm.ai_vllmruntimes.yaml similarity index 100% rename from config/crd/bases/serving.vllm.ai_vllmruntimes.yaml rename to operator/config/crd/bases/serving.vllm.ai_vllmruntimes.yaml diff --git a/config/crd/kustomization.yaml b/operator/config/crd/kustomization.yaml similarity index 100% rename from config/crd/kustomization.yaml rename to operator/config/crd/kustomization.yaml diff --git a/config/crd/kustomizeconfig.yaml b/operator/config/crd/kustomizeconfig.yaml similarity index 100% rename from config/crd/kustomizeconfig.yaml rename to operator/config/crd/kustomizeconfig.yaml diff --git a/config/default/cert_metrics_manager_patch.yaml b/operator/config/default/cert_metrics_manager_patch.yaml similarity index 100% rename from config/default/cert_metrics_manager_patch.yaml rename to operator/config/default/cert_metrics_manager_patch.yaml diff --git a/config/default/kustomization.yaml b/operator/config/default/kustomization.yaml similarity index 100% rename from config/default/kustomization.yaml rename to operator/config/default/kustomization.yaml diff --git a/config/default/manager_metrics_patch.yaml b/operator/config/default/manager_metrics_patch.yaml similarity index 100% rename from config/default/manager_metrics_patch.yaml rename to operator/config/default/manager_metrics_patch.yaml diff --git a/config/default/metrics_service.yaml b/operator/config/default/metrics_service.yaml similarity index 100% rename from config/default/metrics_service.yaml rename to operator/config/default/metrics_service.yaml diff --git a/config/manager/deployment.yaml b/operator/config/manager/deployment.yaml similarity index 100% rename from config/manager/deployment.yaml rename to operator/config/manager/deployment.yaml diff --git a/config/manager/kustomization.yaml b/operator/config/manager/kustomization.yaml similarity index 100% rename from config/manager/kustomization.yaml rename to operator/config/manager/kustomization.yaml diff --git a/config/manager/namespace.yaml b/operator/config/manager/namespace.yaml similarity index 100% rename from config/manager/namespace.yaml rename to operator/config/manager/namespace.yaml diff --git a/config/network-policy/allow-metrics-traffic.yaml b/operator/config/network-policy/allow-metrics-traffic.yaml similarity index 100% rename from config/network-policy/allow-metrics-traffic.yaml rename to operator/config/network-policy/allow-metrics-traffic.yaml diff --git a/config/network-policy/kustomization.yaml b/operator/config/network-policy/kustomization.yaml similarity index 100% rename from config/network-policy/kustomization.yaml rename to operator/config/network-policy/kustomization.yaml diff --git a/config/prometheus/kustomization.yaml b/operator/config/prometheus/kustomization.yaml similarity index 100% rename from config/prometheus/kustomization.yaml rename to operator/config/prometheus/kustomization.yaml diff --git a/config/prometheus/monitor.yaml b/operator/config/prometheus/monitor.yaml similarity index 100% rename from config/prometheus/monitor.yaml rename to operator/config/prometheus/monitor.yaml diff --git a/config/prometheus/monitor_tls_patch.yaml b/operator/config/prometheus/monitor_tls_patch.yaml similarity index 100% rename from config/prometheus/monitor_tls_patch.yaml rename to operator/config/prometheus/monitor_tls_patch.yaml diff --git a/config/rbac/kustomization.yaml b/operator/config/rbac/kustomization.yaml similarity index 100% rename from config/rbac/kustomization.yaml rename to operator/config/rbac/kustomization.yaml diff --git a/config/rbac/leader_election_role.yaml b/operator/config/rbac/leader_election_role.yaml similarity index 100% rename from config/rbac/leader_election_role.yaml rename to operator/config/rbac/leader_election_role.yaml diff --git a/config/rbac/leader_election_role_binding.yaml b/operator/config/rbac/leader_election_role_binding.yaml similarity index 100% rename from config/rbac/leader_election_role_binding.yaml rename to operator/config/rbac/leader_election_role_binding.yaml diff --git a/config/rbac/metrics_auth_role.yaml b/operator/config/rbac/metrics_auth_role.yaml similarity index 100% rename from config/rbac/metrics_auth_role.yaml rename to operator/config/rbac/metrics_auth_role.yaml diff --git a/config/rbac/metrics_auth_role_binding.yaml b/operator/config/rbac/metrics_auth_role_binding.yaml similarity index 100% rename from config/rbac/metrics_auth_role_binding.yaml rename to operator/config/rbac/metrics_auth_role_binding.yaml diff --git a/config/rbac/metrics_reader_role.yaml b/operator/config/rbac/metrics_reader_role.yaml similarity index 100% rename from config/rbac/metrics_reader_role.yaml rename to operator/config/rbac/metrics_reader_role.yaml diff --git a/config/rbac/pod_viewer_role.yaml b/operator/config/rbac/pod_viewer_role.yaml similarity index 100% rename from config/rbac/pod_viewer_role.yaml rename to operator/config/rbac/pod_viewer_role.yaml diff --git a/config/rbac/role.yaml b/operator/config/rbac/role.yaml similarity index 63% rename from config/rbac/role.yaml rename to operator/config/rbac/role.yaml index 7109b302c..04f6f081a 100644 --- a/config/rbac/role.yaml +++ b/operator/config/rbac/role.yaml @@ -18,14 +18,6 @@ rules: - patch - update - watch -- apiGroups: - - "" - resources: - - pods - verbs: - - get - - list - - watch - apiGroups: - apps resources: @@ -38,32 +30,6 @@ rules: - patch - update - watch -- apiGroups: - - production-stack.vllm.ai - resources: - - staticroutes - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - production-stack.vllm.ai - resources: - - staticroutes/finalizers - verbs: - - update -- apiGroups: - - production-stack.vllm.ai - resources: - - staticroutes/status - verbs: - - get - - patch - - update - apiGroups: - serving.vllm.ai resources: diff --git a/config/rbac/role_binding.yaml b/operator/config/rbac/role_binding.yaml similarity index 100% rename from config/rbac/role_binding.yaml rename to operator/config/rbac/role_binding.yaml diff --git a/config/rbac/service_account.yaml b/operator/config/rbac/service_account.yaml similarity index 100% rename from config/rbac/service_account.yaml rename to operator/config/rbac/service_account.yaml diff --git a/config/rbac/vllmrouter_admin_role.yaml b/operator/config/rbac/vllmrouter_admin_role.yaml similarity index 100% rename from config/rbac/vllmrouter_admin_role.yaml rename to operator/config/rbac/vllmrouter_admin_role.yaml diff --git a/config/rbac/vllmrouter_editor_role.yaml b/operator/config/rbac/vllmrouter_editor_role.yaml similarity index 100% rename from config/rbac/vllmrouter_editor_role.yaml rename to operator/config/rbac/vllmrouter_editor_role.yaml diff --git a/config/rbac/vllmrouter_role_binding.yaml b/operator/config/rbac/vllmrouter_role_binding.yaml similarity index 100% rename from config/rbac/vllmrouter_role_binding.yaml rename to operator/config/rbac/vllmrouter_role_binding.yaml diff --git a/config/rbac/vllmrouter_service_account.yaml b/operator/config/rbac/vllmrouter_service_account.yaml similarity index 100% rename from config/rbac/vllmrouter_service_account.yaml rename to operator/config/rbac/vllmrouter_service_account.yaml diff --git a/config/rbac/vllmrouter_viewer_role.yaml b/operator/config/rbac/vllmrouter_viewer_role.yaml similarity index 100% rename from config/rbac/vllmrouter_viewer_role.yaml rename to operator/config/rbac/vllmrouter_viewer_role.yaml diff --git a/config/rbac/vllmruntime_admin_role.yaml b/operator/config/rbac/vllmruntime_admin_role.yaml similarity index 100% rename from config/rbac/vllmruntime_admin_role.yaml rename to operator/config/rbac/vllmruntime_admin_role.yaml diff --git a/config/rbac/vllmruntime_editor_role.yaml b/operator/config/rbac/vllmruntime_editor_role.yaml similarity index 100% rename from config/rbac/vllmruntime_editor_role.yaml rename to operator/config/rbac/vllmruntime_editor_role.yaml diff --git a/config/rbac/vllmruntime_viewer_role.yaml b/operator/config/rbac/vllmruntime_viewer_role.yaml similarity index 100% rename from config/rbac/vllmruntime_viewer_role.yaml rename to operator/config/rbac/vllmruntime_viewer_role.yaml diff --git a/config/samples/kustomization.yaml b/operator/config/samples/kustomization.yaml similarity index 100% rename from config/samples/kustomization.yaml rename to operator/config/samples/kustomization.yaml diff --git a/config/samples/serving_v1alpha1_router.yaml b/operator/config/samples/serving_v1alpha1_router.yaml similarity index 100% rename from config/samples/serving_v1alpha1_router.yaml rename to operator/config/samples/serving_v1alpha1_router.yaml diff --git a/config/samples/serving_v1alpha1_vllmruntime.yaml b/operator/config/samples/serving_v1alpha1_vllmruntime.yaml similarity index 100% rename from config/samples/serving_v1alpha1_vllmruntime.yaml rename to operator/config/samples/serving_v1alpha1_vllmruntime.yaml diff --git a/go.mod b/operator/go.mod similarity index 100% rename from go.mod rename to operator/go.mod diff --git a/go.sum b/operator/go.sum similarity index 100% rename from go.sum rename to operator/go.sum diff --git a/hack/boilerplate.go.txt b/operator/hack/boilerplate.go.txt similarity index 100% rename from hack/boilerplate.go.txt rename to operator/hack/boilerplate.go.txt diff --git a/internal/controller/suite_test.go b/operator/internal/controller/suite_test.go similarity index 100% rename from internal/controller/suite_test.go rename to operator/internal/controller/suite_test.go diff --git a/internal/controller/vllmrouter_controller.go b/operator/internal/controller/vllmrouter_controller.go similarity index 100% rename from internal/controller/vllmrouter_controller.go rename to operator/internal/controller/vllmrouter_controller.go diff --git a/internal/controller/vllmrouter_controller_test.go b/operator/internal/controller/vllmrouter_controller_test.go similarity index 100% rename from internal/controller/vllmrouter_controller_test.go rename to operator/internal/controller/vllmrouter_controller_test.go diff --git a/internal/controller/vllmruntime_controller.go b/operator/internal/controller/vllmruntime_controller.go similarity index 100% rename from internal/controller/vllmruntime_controller.go rename to operator/internal/controller/vllmruntime_controller.go diff --git a/internal/controller/vllmruntime_controller_test.go b/operator/internal/controller/vllmruntime_controller_test.go similarity index 100% rename from internal/controller/vllmruntime_controller_test.go rename to operator/internal/controller/vllmruntime_controller_test.go From c402d55f63c5568c1e92bcacb0e194d7cfecb160 Mon Sep 17 00:00:00 2001 From: royyhuang Date: Fri, 9 May 2025 01:32:18 +0000 Subject: [PATCH 03/14] rename api group from serving.vllm.ai to production-stack.vllm.ai Signed-off-by: royyhuang --- operator/PROJECT | 6 ++--- operator/api/v1alpha1/groupversion_info.go | 6 ++--- operator/cmd/main.go | 4 ++-- ...production-stack.vllm.ai_vllmrouters.yaml} | 4 ++-- ...roduction-stack.vllm.ai_vllmruntimes.yaml} | 4 ++-- operator/config/crd/kustomization.yaml | 4 ++-- operator/config/rbac/role.yaml | 6 ++--- .../config/rbac/vllmrouter_admin_role.yaml | 10 ++++---- .../config/rbac/vllmrouter_editor_role.yaml | 6 ++--- .../config/rbac/vllmrouter_viewer_role.yaml | 6 ++--- .../config/rbac/vllmruntime_admin_role.yaml | 6 ++--- .../config/rbac/vllmruntime_editor_role.yaml | 6 ++--- .../config/rbac/vllmruntime_viewer_role.yaml | 6 ++--- operator/config/samples/kustomization.yaml | 4 ++-- .../samples/serving_v1alpha1_router.yaml | 2 +- .../samples/serving_v1alpha1_vllmruntime.yaml | 2 +- .../controller/vllmrouter_controller.go | 8 +++---- .../controller/vllmrouter_controller_test.go | 8 +++---- .../controller/vllmruntime_controller.go | 24 +++++++++---------- .../controller/vllmruntime_controller_test.go | 8 +++---- 20 files changed, 65 insertions(+), 65 deletions(-) rename operator/config/crd/bases/{serving.vllm.ai_vllmrouters.yaml => production-stack.vllm.ai_vllmrouters.yaml} (99%) rename operator/config/crd/bases/{serving.vllm.ai_vllmruntimes.yaml => production-stack.vllm.ai_vllmruntimes.yaml} (98%) diff --git a/operator/PROJECT b/operator/PROJECT index 97a77c2eb..696ec22c5 100644 --- a/operator/PROJECT +++ b/operator/PROJECT @@ -13,7 +13,7 @@ resources: namespaced: true controller: true domain: vllm.ai - group: serving + group: production stack kind: VLLMRuntime path: production-stack/api/v1alpha1 version: v1alpha1 @@ -22,8 +22,8 @@ resources: namespaced: true controller: true domain: vllm.ai - group: serving - kind: Router + group: production-stack + kind: VLLMRouter path: production-stack/api/v1alpha1 version: v1alpha1 version: "3" diff --git a/operator/api/v1alpha1/groupversion_info.go b/operator/api/v1alpha1/groupversion_info.go index 9d3c2bf50..25304d68a 100644 --- a/operator/api/v1alpha1/groupversion_info.go +++ b/operator/api/v1alpha1/groupversion_info.go @@ -14,9 +14,9 @@ See the License for the specific language governing permissions and limitations under the License. */ -// Package v1alpha1 contains API Schema definitions for the serving v1alpha1 API group. +// Package v1alpha1 contains API Schema definitions for the production-stack v1alpha1 API group. // +kubebuilder:object:generate=true -// +groupName=serving.vllm.ai +// +groupName=production-stack.vllm.ai package v1alpha1 import ( @@ -26,7 +26,7 @@ import ( var ( // GroupVersion is group version used to register these objects. - GroupVersion = schema.GroupVersion{Group: "serving.vllm.ai", Version: "v1alpha1"} + GroupVersion = schema.GroupVersion{Group: "production-stack.vllm.ai", Version: "v1alpha1"} // SchemeBuilder is used to add go types to the GroupVersionKind scheme. SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} diff --git a/operator/cmd/main.go b/operator/cmd/main.go index 94c41e44b..1a32cd700 100644 --- a/operator/cmd/main.go +++ b/operator/cmd/main.go @@ -37,7 +37,7 @@ import ( metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" "sigs.k8s.io/controller-runtime/pkg/webhook" - servingv1alpha1 "production-stack/api/v1alpha1" + productionstackv1alpha1 "production-stack/api/v1alpha1" "production-stack/internal/controller" // +kubebuilder:scaffold:imports ) @@ -50,7 +50,7 @@ var ( func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) - utilruntime.Must(servingv1alpha1.AddToScheme(scheme)) + utilruntime.Must(productionstackv1alpha1.AddToScheme(scheme)) // +kubebuilder:scaffold:scheme } diff --git a/operator/config/crd/bases/serving.vllm.ai_vllmrouters.yaml b/operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml similarity index 99% rename from operator/config/crd/bases/serving.vllm.ai_vllmrouters.yaml rename to operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml index 7445c2e4c..3506964bc 100644 --- a/operator/config/crd/bases/serving.vllm.ai_vllmrouters.yaml +++ b/operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml @@ -4,9 +4,9 @@ kind: CustomResourceDefinition metadata: annotations: controller-gen.kubebuilder.io/version: v0.17.2 - name: vllmrouters.serving.vllm.ai + name: vllmrouters.production-stack.vllm.ai spec: - group: serving.vllm.ai + group: production-stack.vllm.ai names: kind: VLLMRouter listKind: VLLMRouterList diff --git a/operator/config/crd/bases/serving.vllm.ai_vllmruntimes.yaml b/operator/config/crd/bases/production-stack.vllm.ai_vllmruntimes.yaml similarity index 98% rename from operator/config/crd/bases/serving.vllm.ai_vllmruntimes.yaml rename to operator/config/crd/bases/production-stack.vllm.ai_vllmruntimes.yaml index 311dc82a8..c3f21d2ac 100644 --- a/operator/config/crd/bases/serving.vllm.ai_vllmruntimes.yaml +++ b/operator/config/crd/bases/production-stack.vllm.ai_vllmruntimes.yaml @@ -4,9 +4,9 @@ kind: CustomResourceDefinition metadata: annotations: controller-gen.kubebuilder.io/version: v0.17.2 - name: vllmruntimes.serving.vllm.ai + name: vllmruntimes.production-stack.vllm.ai spec: - group: serving.vllm.ai + group: production-stack.vllm.ai names: kind: VLLMRuntime listKind: VLLMRuntimeList diff --git a/operator/config/crd/kustomization.yaml b/operator/config/crd/kustomization.yaml index 500f2b34a..b3158c4de 100644 --- a/operator/config/crd/kustomization.yaml +++ b/operator/config/crd/kustomization.yaml @@ -2,8 +2,8 @@ # since it depends on service name and namespace that are out of this kustomize package. # It should be run by config/default resources: -- bases/serving.vllm.ai_vllmruntimes.yaml -- bases/serving.vllm.ai_vllmrouters.yaml +- bases/production-stack.vllm.ai_vllmruntimes.yaml +- bases/production-stack.vllm.ai_vllmrouters.yaml # +kubebuilder:scaffold:crdkustomizeresource patches: diff --git a/operator/config/rbac/role.yaml b/operator/config/rbac/role.yaml index 04f6f081a..941b757ec 100644 --- a/operator/config/rbac/role.yaml +++ b/operator/config/rbac/role.yaml @@ -31,7 +31,7 @@ rules: - update - watch - apiGroups: - - serving.vllm.ai + - production-stack.vllm.ai resources: - vllmrouters - vllmruntimes @@ -44,14 +44,14 @@ rules: - update - watch - apiGroups: - - serving.vllm.ai + - production-stack.vllm.ai resources: - vllmrouters/finalizers - vllmruntimes/finalizers verbs: - update - apiGroups: - - serving.vllm.ai + - production-stack.vllm.ai resources: - vllmrouters/status - vllmruntimes/status diff --git a/operator/config/rbac/vllmrouter_admin_role.yaml b/operator/config/rbac/vllmrouter_admin_role.yaml index a42914d6b..0efbe62ae 100644 --- a/operator/config/rbac/vllmrouter_admin_role.yaml +++ b/operator/config/rbac/vllmrouter_admin_role.yaml @@ -1,7 +1,7 @@ # This rule is not used by the project production-stack itself. # It is provided to allow the cluster admin to help manage permissions for users. # -# Grants full permissions ('*') over serving.vllm.ai. +# Grants full permissions ('*') over production-stack.vllm.ai. # This role is intended for users authorized to modify roles and bindings within the cluster, # enabling them to delegate specific permissions to other users or groups as needed. @@ -14,14 +14,14 @@ metadata: name: vllmrouter-admin-role rules: - apiGroups: - - serving.vllm.ai + - production-stack.vllm.ai resources: - - routers + - vllmrouters verbs: - '*' - apiGroups: - - serving.vllm.ai + - production-stack.vllm.ai resources: - - routers/status + - vllmrouters/status verbs: - get diff --git a/operator/config/rbac/vllmrouter_editor_role.yaml b/operator/config/rbac/vllmrouter_editor_role.yaml index 0fbf83a34..4cef51a05 100644 --- a/operator/config/rbac/vllmrouter_editor_role.yaml +++ b/operator/config/rbac/vllmrouter_editor_role.yaml @@ -1,7 +1,7 @@ # This rule is not used by the project production-stack itself. # It is provided to allow the cluster admin to help manage permissions for users. # -# Grants permissions to create, update, and delete resources within the serving.vllm.ai. +# Grants permissions to create, update, and delete resources within the production-stack.vllm.ai. # This role is intended for users who need to manage these resources # but should not control RBAC or manage permissions for others. @@ -14,7 +14,7 @@ metadata: name: vllmrouter-editor-role rules: - apiGroups: - - serving.vllm.ai + - production-stack.vllm.ai resources: - vllmrouters verbs: @@ -26,7 +26,7 @@ rules: - update - watch - apiGroups: - - serving.vllm.ai + - production-stack.vllm.ai resources: - vllmrouters/status verbs: diff --git a/operator/config/rbac/vllmrouter_viewer_role.yaml b/operator/config/rbac/vllmrouter_viewer_role.yaml index 5da0be3d5..4451ad09d 100644 --- a/operator/config/rbac/vllmrouter_viewer_role.yaml +++ b/operator/config/rbac/vllmrouter_viewer_role.yaml @@ -1,7 +1,7 @@ # This rule is not used by the project production-stack itself. # It is provided to allow the cluster admin to help manage permissions for users. # -# Grants read-only access to serving.vllm.ai resources. +# Grants read-only access to production-stack.vllm.ai resources. # This role is intended for users who need visibility into these resources # without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing. @@ -14,7 +14,7 @@ metadata: name: vllmrouter-viewer-role rules: - apiGroups: - - serving.vllm.ai + - production-stack.vllm.ai resources: - vllmrouters verbs: @@ -22,7 +22,7 @@ rules: - list - watch - apiGroups: - - serving.vllm.ai + - production-stack.vllm.ai resources: - vllmrouters/status verbs: diff --git a/operator/config/rbac/vllmruntime_admin_role.yaml b/operator/config/rbac/vllmruntime_admin_role.yaml index f765cb599..26e6444a9 100644 --- a/operator/config/rbac/vllmruntime_admin_role.yaml +++ b/operator/config/rbac/vllmruntime_admin_role.yaml @@ -1,7 +1,7 @@ # This rule is not used by the project production-stack itself. # It is provided to allow the cluster admin to help manage permissions for users. # -# Grants full permissions ('*') over serving.vllm.ai. +# Grants full permissions ('*') over production-stack.vllm.ai. # This role is intended for users authorized to modify roles and bindings within the cluster, # enabling them to delegate specific permissions to other users or groups as needed. @@ -14,13 +14,13 @@ metadata: name: vllmruntime-admin-role rules: - apiGroups: - - serving.vllm.ai + - production-stack.vllm.ai resources: - vllmruntimes verbs: - '*' - apiGroups: - - serving.vllm.ai + - production-stack.vllm.ai resources: - vllmruntimes/status verbs: diff --git a/operator/config/rbac/vllmruntime_editor_role.yaml b/operator/config/rbac/vllmruntime_editor_role.yaml index c62691af5..71a1972d2 100644 --- a/operator/config/rbac/vllmruntime_editor_role.yaml +++ b/operator/config/rbac/vllmruntime_editor_role.yaml @@ -1,7 +1,7 @@ # This rule is not used by the project production-stack itself. # It is provided to allow the cluster admin to help manage permissions for users. # -# Grants permissions to create, update, and delete resources within the serving.vllm.ai. +# Grants permissions to create, update, and delete resources within the production-stack.vllm.ai. # This role is intended for users who need to manage these resources # but should not control RBAC or manage permissions for others. @@ -14,7 +14,7 @@ metadata: name: vllmruntime-editor-role rules: - apiGroups: - - serving.vllm.ai + - production-stack.vllm.ai resources: - vllmruntimes verbs: @@ -26,7 +26,7 @@ rules: - update - watch - apiGroups: - - serving.vllm.ai + - production-stack.vllm.ai resources: - vllmruntimes/status verbs: diff --git a/operator/config/rbac/vllmruntime_viewer_role.yaml b/operator/config/rbac/vllmruntime_viewer_role.yaml index 1314e4715..6e7719f13 100644 --- a/operator/config/rbac/vllmruntime_viewer_role.yaml +++ b/operator/config/rbac/vllmruntime_viewer_role.yaml @@ -1,7 +1,7 @@ # This rule is not used by the project production-stack itself. # It is provided to allow the cluster admin to help manage permissions for users. # -# Grants read-only access to serving.vllm.ai resources. +# Grants read-only access to production-stack.vllm.ai resources. # This role is intended for users who need visibility into these resources # without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing. @@ -14,7 +14,7 @@ metadata: name: vllmruntime-viewer-role rules: - apiGroups: - - serving.vllm.ai + - production-stack.vllm.ai resources: - vllmruntimes verbs: @@ -22,7 +22,7 @@ rules: - list - watch - apiGroups: - - serving.vllm.ai + - production-stack.vllm.ai resources: - vllmruntimes/status verbs: diff --git a/operator/config/samples/kustomization.yaml b/operator/config/samples/kustomization.yaml index 8a0f43239..3cd7d3881 100644 --- a/operator/config/samples/kustomization.yaml +++ b/operator/config/samples/kustomization.yaml @@ -3,6 +3,6 @@ kind: Kustomization ## Append samples of your project ## resources: -- serving_v1alpha1_vllmruntime.yaml -- serving_v1alpha1_router.yaml +- production-stack_v1alpha1_vllmruntime.yaml +- production-stack_v1alpha1_vllmrouter.yaml # +kubebuilder:scaffold:manifestskustomizesamples diff --git a/operator/config/samples/serving_v1alpha1_router.yaml b/operator/config/samples/serving_v1alpha1_router.yaml index 3400dace4..9d33d0329 100644 --- a/operator/config/samples/serving_v1alpha1_router.yaml +++ b/operator/config/samples/serving_v1alpha1_router.yaml @@ -1,4 +1,4 @@ -apiVersion: serving.vllm.ai/v1alpha1 +apiVersion: production-stack.vllm.ai/v1alpha1 kind: VLLMRouter metadata: labels: diff --git a/operator/config/samples/serving_v1alpha1_vllmruntime.yaml b/operator/config/samples/serving_v1alpha1_vllmruntime.yaml index 3730c482f..1804900cb 100644 --- a/operator/config/samples/serving_v1alpha1_vllmruntime.yaml +++ b/operator/config/samples/serving_v1alpha1_vllmruntime.yaml @@ -1,4 +1,4 @@ -apiVersion: serving.vllm.ai/v1alpha1 +apiVersion: production-stack.vllm.ai/v1alpha1 kind: VLLMRuntime metadata: labels: diff --git a/operator/internal/controller/vllmrouter_controller.go b/operator/internal/controller/vllmrouter_controller.go index 002f9cddb..6fc385a8a 100644 --- a/operator/internal/controller/vllmrouter_controller.go +++ b/operator/internal/controller/vllmrouter_controller.go @@ -42,10 +42,10 @@ type VLLMRouterReconciler struct { Scheme *runtime.Scheme } -// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmrouters,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmrouters/status,verbs=get;update;patch -// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmrouters/finalizers,verbs=update -// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmruntimes,verbs=get;list;watch +// +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=vllmrouters,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=vllmrouters/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=vllmrouters/finalizers,verbs=update +// +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=vllmruntimes,verbs=get;list;watch // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete diff --git a/operator/internal/controller/vllmrouter_controller_test.go b/operator/internal/controller/vllmrouter_controller_test.go index fe68a9f5c..1642ad5f8 100644 --- a/operator/internal/controller/vllmrouter_controller_test.go +++ b/operator/internal/controller/vllmrouter_controller_test.go @@ -27,7 +27,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - servingv1alpha1 "production-stack/api/v1alpha1" + productionstackv1alpha1 "production-stack/api/v1alpha1" ) var _ = Describe("VLLMRouter Controller", func() { @@ -40,13 +40,13 @@ var _ = Describe("VLLMRouter Controller", func() { Name: resourceName, Namespace: "default", // TODO(user):Modify as needed } - router := &servingv1alpha1.VLLMRouter{} + router := &productionstackv1alpha1.VLLMRouter{} BeforeEach(func() { By("creating the custom resource for the Kind VLLMRouter") err := k8sClient.Get(ctx, typeNamespacedName, router) if err != nil && errors.IsNotFound(err) { - resource := &servingv1alpha1.VLLMRouter{ + resource := &productionstackv1alpha1.VLLMRouter{ ObjectMeta: metav1.ObjectMeta{ Name: resourceName, Namespace: "default", @@ -59,7 +59,7 @@ var _ = Describe("VLLMRouter Controller", func() { AfterEach(func() { // TODO(user): Cleanup logic after each test, like removing the resource instance. - resource := &servingv1alpha1.VLLMRouter{} + resource := &productionstackv1alpha1.VLLMRouter{} err := k8sClient.Get(ctx, typeNamespacedName, resource) Expect(err).NotTo(HaveOccurred()) diff --git a/operator/internal/controller/vllmruntime_controller.go b/operator/internal/controller/vllmruntime_controller.go index 97cb59885..b6f15f075 100644 --- a/operator/internal/controller/vllmruntime_controller.go +++ b/operator/internal/controller/vllmruntime_controller.go @@ -33,7 +33,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" - servingv1alpha1 "production-stack/api/v1alpha1" + productionstackv1alpha1 "production-stack/api/v1alpha1" ) // VLLMRuntimeReconciler reconciles a VLLMRuntime object @@ -42,9 +42,9 @@ type VLLMRuntimeReconciler struct { Scheme *runtime.Scheme } -// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmruntimes,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmruntimes/status,verbs=get;update;patch -// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmruntimes/finalizers,verbs=update +// +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=vllmruntimes,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=vllmruntimes/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=vllmruntimes/finalizers,verbs=update // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete @@ -56,7 +56,7 @@ func (r *VLLMRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request) log := log.FromContext(ctx) // Fetch the VLLMRuntime instance - vllmRuntime := &servingv1alpha1.VLLMRuntime{} + vllmRuntime := &productionstackv1alpha1.VLLMRuntime{} err := r.Get(ctx, req.NamespacedName, vllmRuntime) if err != nil { if errors.IsNotFound(err) { @@ -148,7 +148,7 @@ func (r *VLLMRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request) } // deploymentForVLLMRuntime returns a VLLMRuntime Deployment object -func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *servingv1alpha1.VLLMRuntime) *appsv1.Deployment { +func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *productionstackv1alpha1.VLLMRuntime) *appsv1.Deployment { labels := map[string]string{ "app": vllmRuntime.Name, } @@ -395,7 +395,7 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *servingv1a } // deploymentNeedsUpdate checks if the deployment needs to be updated -func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr *servingv1alpha1.VLLMRuntime) bool { +func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr *productionstackv1alpha1.VLLMRuntime) bool { // Generate the expected deployment expectedDep := r.deploymentForVLLMRuntime(vr) @@ -460,9 +460,9 @@ func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr } // updateStatus updates the status of the VLLMRuntime -func (r *VLLMRuntimeReconciler) updateStatus(ctx context.Context, vr *servingv1alpha1.VLLMRuntime, dep *appsv1.Deployment) error { +func (r *VLLMRuntimeReconciler) updateStatus(ctx context.Context, vr *productionstackv1alpha1.VLLMRuntime, dep *appsv1.Deployment) error { // Re-read the VLLMRuntime to get the latest version - latestVR := &servingv1alpha1.VLLMRuntime{} + latestVR := &productionstackv1alpha1.VLLMRuntime{} if err := r.Get(ctx, types.NamespacedName{Name: vr.Name, Namespace: vr.Namespace}, latestVR); err != nil { return err } @@ -483,7 +483,7 @@ func (r *VLLMRuntimeReconciler) updateStatus(ctx context.Context, vr *servingv1a } // serviceForVLLMRuntime returns a VLLMRuntime Service object -func (r *VLLMRuntimeReconciler) serviceForVLLMRuntime(vllmRuntime *servingv1alpha1.VLLMRuntime) *corev1.Service { +func (r *VLLMRuntimeReconciler) serviceForVLLMRuntime(vllmRuntime *productionstackv1alpha1.VLLMRuntime) *corev1.Service { labels := map[string]string{ "app": vllmRuntime.Name, } @@ -513,7 +513,7 @@ func (r *VLLMRuntimeReconciler) serviceForVLLMRuntime(vllmRuntime *servingv1alph } // serviceNeedsUpdate checks if the service needs to be updated -func (r *VLLMRuntimeReconciler) serviceNeedsUpdate(svc *corev1.Service, vr *servingv1alpha1.VLLMRuntime) bool { +func (r *VLLMRuntimeReconciler) serviceNeedsUpdate(svc *corev1.Service, vr *productionstackv1alpha1.VLLMRuntime) bool { // Compare target port expectedTargetPort := int(vr.Spec.Port) actualTargetPort := svc.Spec.Ports[0].TargetPort.IntValue() @@ -527,7 +527,7 @@ func (r *VLLMRuntimeReconciler) serviceNeedsUpdate(svc *corev1.Service, vr *serv // SetupWithManager sets up the controller with the Manager. func (r *VLLMRuntimeReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). - For(&servingv1alpha1.VLLMRuntime{}). + For(&productionstackv1alpha1.VLLMRuntime{}). Owns(&appsv1.Deployment{}). Owns(&corev1.Service{}). Complete(r) diff --git a/operator/internal/controller/vllmruntime_controller_test.go b/operator/internal/controller/vllmruntime_controller_test.go index 9f07fc776..f808ada78 100644 --- a/operator/internal/controller/vllmruntime_controller_test.go +++ b/operator/internal/controller/vllmruntime_controller_test.go @@ -27,7 +27,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - servingv1alpha1 "production-stack/api/v1alpha1" + productionstackv1alpha1 "production-stack/api/v1alpha1" ) var _ = Describe("VLLMRuntime Controller", func() { @@ -40,13 +40,13 @@ var _ = Describe("VLLMRuntime Controller", func() { Name: resourceName, Namespace: "default", // TODO(user):Modify as needed } - vllmruntime := &servingv1alpha1.VLLMRuntime{} + vllmruntime := &productionstackv1alpha1.VLLMRuntime{} BeforeEach(func() { By("creating the custom resource for the Kind VLLMRuntime") err := k8sClient.Get(ctx, typeNamespacedName, vllmruntime) if err != nil && errors.IsNotFound(err) { - resource := &servingv1alpha1.VLLMRuntime{ + resource := &productionstackv1alpha1.VLLMRuntime{ ObjectMeta: metav1.ObjectMeta{ Name: resourceName, Namespace: "default", @@ -59,7 +59,7 @@ var _ = Describe("VLLMRuntime Controller", func() { AfterEach(func() { // TODO(user): Cleanup logic after each test, like removing the resource instance. - resource := &servingv1alpha1.VLLMRuntime{} + resource := &productionstackv1alpha1.VLLMRuntime{} err := k8sClient.Get(ctx, typeNamespacedName, resource) Expect(err).NotTo(HaveOccurred()) From 55aad79a4c2e1d224671e6f69de66801f0ff1336 Mon Sep 17 00:00:00 2001 From: royyhuang Date: Fri, 9 May 2025 03:31:04 +0000 Subject: [PATCH 04/14] enable lmcache cpu offloading Signed-off-by: royyhuang --- .codespell-ignore | 2 + .gitignore | 3 - operator/api/v1alpha1/vllmrouter_types.go | 4 +- .../production-stack.vllm.ai_vllmrouters.yaml | 14 +- ... production-stack_v1alpha_vllmrouter.yaml} | 4 +- ...production-stack_v1alpha_vllmruntime.yaml} | 6 +- operator/go.mod | 2 +- operator/internal/controller/suite_test.go | 4 +- .../controller/vllmrouter_controller.go | 44 ++- .../controller/vllmruntime_controller.go | 59 +++- operator/test/e2e/e2e_suite_test.go | 89 +++++ operator/test/e2e/e2e_test.go | 329 ++++++++++++++++++ operator/test/utils/utils.go | 251 +++++++++++++ 13 files changed, 755 insertions(+), 56 deletions(-) rename operator/config/samples/{serving_v1alpha1_router.yaml => production-stack_v1alpha_vllmrouter.yaml} (95%) rename operator/config/samples/{serving_v1alpha1_vllmruntime.yaml => production-stack_v1alpha_vllmruntime.yaml} (93%) create mode 100644 operator/test/e2e/e2e_suite_test.go create mode 100644 operator/test/e2e/e2e_test.go create mode 100644 operator/test/utils/utils.go diff --git a/.codespell-ignore b/.codespell-ignore index bd1259c69..d12a466dc 100644 --- a/.codespell-ignore +++ b/.codespell-ignore @@ -1,2 +1,4 @@ AKS aks +NotIn +AfterAll diff --git a/.gitignore b/.gitignore index b0adb89f1..1341491df 100644 --- a/.gitignore +++ b/.gitignore @@ -178,6 +178,3 @@ docs/book/src/docs /testdata/**/go.sum /docs/book/src/simple-external-plugin-tutorial/testdata/sampleexternalplugin/v1/bin /testdata/**legacy** - -# skip operator's test for now -operator/test diff --git a/operator/api/v1alpha1/vllmrouter_types.go b/operator/api/v1alpha1/vllmrouter_types.go index 2bf632949..446ee10b6 100644 --- a/operator/api/v1alpha1/vllmrouter_types.go +++ b/operator/api/v1alpha1/vllmrouter_types.go @@ -58,10 +58,10 @@ type VLLMRouterSpec struct { SessionKey string `json:"sessionKey,omitempty"` // EngineScrapeInterval for collecting engine statistics - EngineScrapeInterval string `json:"engineScrapeInterval,omitempty"` + EngineScrapeInterval int32 `json:"engineScrapeInterval,omitempty"` // RequestStatsWindow for request statistics - RequestStatsWindow string `json:"requestStatsWindow,omitempty"` + RequestStatsWindow int32 `json:"requestStatsWindow,omitempty"` // ExtraArgs for additional router arguments ExtraArgs []string `json:"extraArgs,omitempty"` diff --git a/operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml b/operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml index 3506964bc..8668380c3 100644 --- a/operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml +++ b/operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml @@ -45,7 +45,8 @@ spec: type: boolean engineScrapeInterval: description: EngineScrapeInterval for collecting engine statistics - type: string + format: int32 + type: integer env: description: Environment variables items: @@ -102,11 +103,11 @@ spec: operator: description: |- Represents a key's relationship to a set of values. - Valid operators are In, Not In, Exists, DoesNotExist. Gt, and Lt. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. type: string values: description: |- - An array of string values. If the operator is In or Not In, + An array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. If the operator is Gt or Lt, the values array must have a single element, which will be interpreted as an integer. @@ -135,11 +136,11 @@ spec: operator: description: |- Represents a key's relationship to a set of values. - Valid operators are In, Not In, Exists, DoesNotExist. Gt, and Lt. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. type: string values: description: |- - An array of string values. If the operator is In or Not In, + An array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. If the operator is Gt or Lt, the values array must have a single element, which will be interpreted as an integer. @@ -169,7 +170,8 @@ spec: type: integer requestStatsWindow: description: RequestStatsWindow for request statistics - type: string + format: int32 + type: integer resources: description: Resource requirements properties: diff --git a/operator/config/samples/serving_v1alpha1_router.yaml b/operator/config/samples/production-stack_v1alpha_vllmrouter.yaml similarity index 95% rename from operator/config/samples/serving_v1alpha1_router.yaml rename to operator/config/samples/production-stack_v1alpha_vllmrouter.yaml index 9d33d0329..c774472dd 100644 --- a/operator/config/samples/serving_v1alpha1_router.yaml +++ b/operator/config/samples/production-stack_v1alpha_vllmrouter.yaml @@ -19,10 +19,10 @@ spec: routingLogic: roundrobin # Engine statistics collection interval - engineScrapeInterval: "30" + engineScrapeInterval: 30 # Request statistics window - requestStatsWindow: "60" + requestStatsWindow: 60 # Container port for the router service port: 80 diff --git a/operator/config/samples/serving_v1alpha1_vllmruntime.yaml b/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml similarity index 93% rename from operator/config/samples/serving_v1alpha1_vllmruntime.yaml rename to operator/config/samples/production-stack_v1alpha_vllmruntime.yaml index 1804900cb..0e13adbdc 100644 --- a/operator/config/samples/serving_v1alpha1_vllmruntime.yaml +++ b/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml @@ -14,13 +14,13 @@ spec: gpuMemoryUtilization: "0.8" maxLoras: 4 extraArgs: ["--disable-log-requests"] - v1: false + v1: true # LM Cache configuration lmCacheConfig: enabled: true - cpuOffloadingBufferSize: "4Gi" - diskOffloadingBufferSize: "8Gi" + cpuOffloadingBufferSize: "15" + diskOffloadingBufferSize: "8" remoteUrl: "" remoteSerde: "" diff --git a/operator/go.mod b/operator/go.mod index be7288dec..34589b9a0 100644 --- a/operator/go.mod +++ b/operator/go.mod @@ -10,6 +10,7 @@ require ( k8s.io/apimachinery v0.33.0 k8s.io/client-go v0.33.0 sigs.k8s.io/controller-runtime v0.20.4 + k8s.io/api v0.33.0 ) require ( @@ -85,7 +86,6 @@ require ( gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/api v0.33.0 // indirect k8s.io/apiextensions-apiserver v0.32.1 // indirect k8s.io/apiserver v0.32.1 // indirect k8s.io/component-base v0.32.1 // indirect diff --git a/operator/internal/controller/suite_test.go b/operator/internal/controller/suite_test.go index 13578d9d9..5cbcdfe09 100644 --- a/operator/internal/controller/suite_test.go +++ b/operator/internal/controller/suite_test.go @@ -32,7 +32,7 @@ import ( logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" - servingv1alpha1 "production-stack/api/v1alpha1" + productionstackv1alpha1 "production-stack/api/v1alpha1" // +kubebuilder:scaffold:imports ) @@ -71,7 +71,7 @@ var _ = BeforeSuite(func() { Expect(err).NotTo(HaveOccurred()) Expect(cfg).NotTo(BeNil()) - err = servingv1alpha1.AddToScheme(scheme.Scheme) + err = productionstackv1alpha1.AddToScheme(scheme.Scheme) Expect(err).NotTo(HaveOccurred()) // +kubebuilder:scaffold:scheme diff --git a/operator/internal/controller/vllmrouter_controller.go b/operator/internal/controller/vllmrouter_controller.go index 6fc385a8a..dc99ee433 100644 --- a/operator/internal/controller/vllmrouter_controller.go +++ b/operator/internal/controller/vllmrouter_controller.go @@ -29,6 +29,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/util/retry" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" @@ -226,11 +227,11 @@ func (r *VLLMRouterReconciler) deploymentForVLLMRouter(router *servingv1alpha1.V if router.Spec.SessionKey != "" { args = append(args, "--session-key", router.Spec.SessionKey) } - if router.Spec.EngineScrapeInterval != "" { - args = append(args, "--engine-stats-interval", router.Spec.EngineScrapeInterval) + if router.Spec.EngineScrapeInterval != 0 { + args = append(args, "--engine-stats-interval", fmt.Sprintf("%d", router.Spec.EngineScrapeInterval)) } - if router.Spec.RequestStatsWindow != "" { - args = append(args, "--request-stats-window", router.Spec.RequestStatsWindow) + if router.Spec.RequestStatsWindow != 0 { + args = append(args, "--request-stats-window", fmt.Sprintf("%d", router.Spec.RequestStatsWindow)) } if router.Spec.ExtraArgs != nil { args = append(args, router.Spec.ExtraArgs...) @@ -323,24 +324,29 @@ func (r *VLLMRouterReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, rou // updateStatus updates the status of the VLLMRouter func (r *VLLMRouterReconciler) updateStatus(ctx context.Context, router *servingv1alpha1.VLLMRouter, dep *appsv1.Deployment) error { - // Re-read the VLLMRouter to get the latest version - latestRouter := &servingv1alpha1.VLLMRouter{} - if err := r.Get(ctx, types.NamespacedName{Name: router.Name, Namespace: router.Namespace}, latestRouter); err != nil { - return err - } + return retry.OnError(retry.DefaultRetry, func(err error) bool { + return errors.IsConflict(err) + }, func() error { + // Get the latest version of the VLLMRouter + latestRouter := &servingv1alpha1.VLLMRouter{} + if err := r.Get(ctx, types.NamespacedName{Name: router.Name, Namespace: router.Namespace}, latestRouter); err != nil { + return err + } - latestRouter.Status.LastUpdated = metav1.Now() + // Update the status fields + latestRouter.Status.LastUpdated = metav1.Now() - // Update VLLMRouter status based on deployment status - if dep.Status.AvailableReplicas > 0 { - latestRouter.Status.Status = "Ready" - } else if dep.Status.UpdatedReplicas > 0 { - latestRouter.Status.Status = "Updating" - } else { - latestRouter.Status.Status = "NotReady" - } + // Update VLLMRouter status based on deployment status + if dep.Status.AvailableReplicas > 0 { + latestRouter.Status.Status = "Ready" + } else if dep.Status.UpdatedReplicas > 0 { + latestRouter.Status.Status = "Updating" + } else { + latestRouter.Status.Status = "NotReady" + } - return r.Status().Update(ctx, latestRouter) + return r.Status().Update(ctx, latestRouter) + }) } // serviceForVLLMRouter returns a VLLMRouter Service object diff --git a/operator/internal/controller/vllmruntime_controller.go b/operator/internal/controller/vllmruntime_controller.go index b6f15f075..a4937fc59 100644 --- a/operator/internal/controller/vllmruntime_controller.go +++ b/operator/internal/controller/vllmruntime_controller.go @@ -29,6 +29,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/util/retry" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" @@ -246,6 +247,15 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production }, ) + // Add KV transfer config based on V1 flag + var lmcache_config string + if vllmRuntime.Spec.V1 { + lmcache_config = `{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}` + } else { + lmcache_config = `{"kv_connector":"LMCacheConnector","kv_role":"kv_both"}` + } + args = append(args, "--kv-transfer-config", lmcache_config) + if vllmRuntime.Spec.LMCacheConfig.CPUOffloadingBufferSize != "" { env = append(env, corev1.EnvVar{ @@ -437,6 +447,8 @@ func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr actualEnabled := false actualCPUOffloadingBufferSize := "" actualDiskOffloadingBufferSize := "" + actualRemoteURL := "" + actualRemoteSerde := "" for _, env := range actualLMCacheConfig { switch env.Name { @@ -446,13 +458,19 @@ func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr actualCPUOffloadingBufferSize = env.Value case "LMCACHE_MAX_LOCAL_DISK_SIZE": actualDiskOffloadingBufferSize = env.Value + case "LMCACHE_REMOTE_URL": + actualRemoteURL = env.Value + case "LMCACHE_REMOTE_SERDE": + actualRemoteSerde = env.Value } } // Compare specific fields if expectedLMCacheConfig.Enabled != actualEnabled || expectedLMCacheConfig.CPUOffloadingBufferSize != actualCPUOffloadingBufferSize || - expectedLMCacheConfig.DiskOffloadingBufferSize != actualDiskOffloadingBufferSize { + expectedLMCacheConfig.DiskOffloadingBufferSize != actualDiskOffloadingBufferSize || + expectedLMCacheConfig.RemoteURL != actualRemoteURL || + expectedLMCacheConfig.RemoteSerde != actualRemoteSerde { return true } @@ -461,25 +479,30 @@ func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr // updateStatus updates the status of the VLLMRuntime func (r *VLLMRuntimeReconciler) updateStatus(ctx context.Context, vr *productionstackv1alpha1.VLLMRuntime, dep *appsv1.Deployment) error { - // Re-read the VLLMRuntime to get the latest version - latestVR := &productionstackv1alpha1.VLLMRuntime{} - if err := r.Get(ctx, types.NamespacedName{Name: vr.Name, Namespace: vr.Namespace}, latestVR); err != nil { - return err - } - - latestVR.Status.LastUpdated = metav1.Now() + return retry.OnError(retry.DefaultRetry, func(err error) bool { + return errors.IsConflict(err) + }, func() error { + // Get the latest version of the VLLMRuntime + latestVR := &productionstackv1alpha1.VLLMRuntime{} + if err := r.Get(ctx, types.NamespacedName{Name: vr.Name, Namespace: vr.Namespace}, latestVR); err != nil { + return err + } - // Update model status based on deployment status - if dep.Status.AvailableReplicas > 0 { - latestVR.Status.ModelStatus = "Ready" - } else if dep.Status.UpdatedReplicas > 0 { - // If we have updated replicas but they're not yet available, mark as updating - latestVR.Status.ModelStatus = "Updating" - } else { - latestVR.Status.ModelStatus = "NotReady" - } + // Update the status fields + latestVR.Status.LastUpdated = metav1.Now() + + // Update model status based on deployment status + if dep.Status.AvailableReplicas > 0 { + latestVR.Status.ModelStatus = "Ready" + } else if dep.Status.UpdatedReplicas > 0 { + // If we have updated replicas but they're not yet available, mark as updating + latestVR.Status.ModelStatus = "Updating" + } else { + latestVR.Status.ModelStatus = "NotReady" + } - return r.Status().Update(ctx, latestVR) + return r.Status().Update(ctx, latestVR) + }) } // serviceForVLLMRuntime returns a VLLMRuntime Service object diff --git a/operator/test/e2e/e2e_suite_test.go b/operator/test/e2e/e2e_suite_test.go new file mode 100644 index 000000000..25caaab2a --- /dev/null +++ b/operator/test/e2e/e2e_suite_test.go @@ -0,0 +1,89 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "fmt" + "os" + "os/exec" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "production-stack/test/utils" +) + +var ( + // Optional Environment Variables: + // - CERT_MANAGER_INSTALL_SKIP=true: Skips CertManager installation during test setup. + // These variables are useful if CertManager is already installed, avoiding + // re-installation and conflicts. + skipCertManagerInstall = os.Getenv("CERT_MANAGER_INSTALL_SKIP") == "true" + // isCertManagerAlreadyInstalled will be set true when CertManager CRDs be found on the cluster + isCertManagerAlreadyInstalled = false + + // projectImage is the name of the image which will be build and loaded + // with the code source changes to be tested. + projectImage = "example.com/production-stack:v0.0.1" +) + +// TestE2E runs the end-to-end (e2e) test suite for the project. These tests execute in an isolated, +// temporary environment to validate project changes with the purposed to be used in CI jobs. +// The default setup requires Kind, builds/loads the Manager Docker image locally, and installs +// CertManager. +func TestE2E(t *testing.T) { + RegisterFailHandler(Fail) + _, _ = fmt.Fprintf(GinkgoWriter, "Starting production-stack integration test suite\n") + RunSpecs(t, "e2e suite") +} + +var _ = BeforeSuite(func() { + By("building the manager(Operator) image") + cmd := exec.Command("make", "docker-build", fmt.Sprintf("IMG=%s", projectImage)) + _, err := utils.Run(cmd) + ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to build the manager(Operator) image") + + // TODO(user): If you want to change the e2e test vendor from Kind, ensure the image is + // built and available before running the tests. Also, remove the following block. + By("loading the manager(Operator) image on Kind") + err = utils.LoadImageToKindClusterWithName(projectImage) + ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to load the manager(Operator) image into Kind") + + // The tests-e2e are intended to run on a temporary cluster that is created and destroyed for testing. + // To prevent errors when tests run in environments with CertManager already installed, + // we check for its presence before execution. + // Setup CertManager before the suite if not skipped and if not already installed + if !skipCertManagerInstall { + By("checking if cert manager is installed already") + isCertManagerAlreadyInstalled = utils.IsCertManagerCRDsInstalled() + if !isCertManagerAlreadyInstalled { + _, _ = fmt.Fprintf(GinkgoWriter, "Installing CertManager...\n") + Expect(utils.InstallCertManager()).To(Succeed(), "Failed to install CertManager") + } else { + _, _ = fmt.Fprintf(GinkgoWriter, "WARNING: CertManager is already installed. Skipping installation...\n") + } + } +}) + +var _ = AfterSuite(func() { + // Teardown CertManager after the suite if not skipped and if it was not already installed + if !skipCertManagerInstall && !isCertManagerAlreadyInstalled { + _, _ = fmt.Fprintf(GinkgoWriter, "Uninstalling CertManager...\n") + utils.UninstallCertManager() + } +}) diff --git a/operator/test/e2e/e2e_test.go b/operator/test/e2e/e2e_test.go new file mode 100644 index 000000000..e2c353d7f --- /dev/null +++ b/operator/test/e2e/e2e_test.go @@ -0,0 +1,329 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "production-stack/test/utils" +) + +// namespace where the project is deployed in +const namespace = "production-stack-system" + +// serviceAccountName created for the project +const serviceAccountName = "production-stack-controller-manager" + +// metricsServiceName is the name of the metrics service of the project +const metricsServiceName = "production-stack-controller-manager-metrics-service" + +// metricsRoleBindingName is the name of the RBAC that will be created to allow get the metrics data +const metricsRoleBindingName = "production-stack-metrics-binding" + +var _ = Describe("Manager", Ordered, func() { + var controllerPodName string + + // Before running the tests, set up the environment by creating the namespace, + // enforce the restricted security policy to the namespace, installing CRDs, + // and deploying the controller. + BeforeAll(func() { + By("creating manager namespace") + cmd := exec.Command("kubectl", "create", "ns", namespace) + _, err := utils.Run(cmd) + Expect(err).NotTo(HaveOccurred(), "Failed to create namespace") + + By("labeling the namespace to enforce the restricted security policy") + cmd = exec.Command("kubectl", "label", "--overwrite", "ns", namespace, + "pod-security.kubernetes.io/enforce=restricted") + _, err = utils.Run(cmd) + Expect(err).NotTo(HaveOccurred(), "Failed to label namespace with restricted policy") + + By("installing CRDs") + cmd = exec.Command("make", "install") + _, err = utils.Run(cmd) + Expect(err).NotTo(HaveOccurred(), "Failed to install CRDs") + + By("deploying the controller-manager") + cmd = exec.Command("make", "deploy", fmt.Sprintf("IMG=%s", projectImage)) + _, err = utils.Run(cmd) + Expect(err).NotTo(HaveOccurred(), "Failed to deploy the controller-manager") + }) + + // After all tests have been executed, clean up by undeploying the controller, uninstalling CRDs, + // and deleting the namespace. + AfterAll(func() { + By("cleaning up the curl pod for metrics") + cmd := exec.Command("kubectl", "delete", "pod", "curl-metrics", "-n", namespace) + _, _ = utils.Run(cmd) + + By("undeploying the controller-manager") + cmd = exec.Command("make", "undeploy") + _, _ = utils.Run(cmd) + + By("uninstalling CRDs") + cmd = exec.Command("make", "uninstall") + _, _ = utils.Run(cmd) + + By("removing manager namespace") + cmd = exec.Command("kubectl", "delete", "ns", namespace) + _, _ = utils.Run(cmd) + }) + + // After each test, check for failures and collect logs, events, + // and pod descriptions for debugging. + AfterEach(func() { + specReport := CurrentSpecReport() + if specReport.Failed() { + By("Fetching controller manager pod logs") + cmd := exec.Command("kubectl", "logs", controllerPodName, "-n", namespace) + controllerLogs, err := utils.Run(cmd) + if err == nil { + _, _ = fmt.Fprintf(GinkgoWriter, "Controller logs:\n %s", controllerLogs) + } else { + _, _ = fmt.Fprintf(GinkgoWriter, "Failed to get Controller logs: %s", err) + } + + By("Fetching Kubernetes events") + cmd = exec.Command("kubectl", "get", "events", "-n", namespace, "--sort-by=.lastTimestamp") + eventsOutput, err := utils.Run(cmd) + if err == nil { + _, _ = fmt.Fprintf(GinkgoWriter, "Kubernetes events:\n%s", eventsOutput) + } else { + _, _ = fmt.Fprintf(GinkgoWriter, "Failed to get Kubernetes events: %s", err) + } + + By("Fetching curl-metrics logs") + cmd = exec.Command("kubectl", "logs", "curl-metrics", "-n", namespace) + metricsOutput, err := utils.Run(cmd) + if err == nil { + _, _ = fmt.Fprintf(GinkgoWriter, "Metrics logs:\n %s", metricsOutput) + } else { + _, _ = fmt.Fprintf(GinkgoWriter, "Failed to get curl-metrics logs: %s", err) + } + + By("Fetching controller manager pod description") + cmd = exec.Command("kubectl", "describe", "pod", controllerPodName, "-n", namespace) + podDescription, err := utils.Run(cmd) + if err == nil { + fmt.Println("Pod description:\n", podDescription) + } else { + fmt.Println("Failed to describe controller pod") + } + } + }) + + SetDefaultEventuallyTimeout(2 * time.Minute) + SetDefaultEventuallyPollingInterval(time.Second) + + Context("Manager", func() { + It("should run successfully", func() { + By("validating that the controller-manager pod is running as expected") + verifyControllerUp := func(g Gomega) { + // Get the name of the controller-manager pod + cmd := exec.Command("kubectl", "get", + "pods", "-l", "control-plane=controller-manager", + "-o", "go-template={{ range .items }}"+ + "{{ if not .metadata.deletionTimestamp }}"+ + "{{ .metadata.name }}"+ + "{{ \"\\n\" }}{{ end }}{{ end }}", + "-n", namespace, + ) + + podOutput, err := utils.Run(cmd) + g.Expect(err).NotTo(HaveOccurred(), "Failed to retrieve controller-manager pod information") + podNames := utils.GetNonEmptyLines(podOutput) + g.Expect(podNames).To(HaveLen(1), "expected 1 controller pod running") + controllerPodName = podNames[0] + g.Expect(controllerPodName).To(ContainSubstring("controller-manager")) + + // Validate the pod's status + cmd = exec.Command("kubectl", "get", + "pods", controllerPodName, "-o", "jsonpath={.status.phase}", + "-n", namespace, + ) + output, err := utils.Run(cmd) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(output).To(Equal("Running"), "Incorrect controller-manager pod status") + } + Eventually(verifyControllerUp).Should(Succeed()) + }) + + It("should ensure the metrics endpoint is serving metrics", func() { + By("creating a ClusterRoleBinding for the service account to allow access to metrics") + cmd := exec.Command("kubectl", "create", "clusterrolebinding", metricsRoleBindingName, + "--clusterrole=production-stack-metrics-reader", + fmt.Sprintf("--serviceaccount=%s:%s", namespace, serviceAccountName), + ) + _, err := utils.Run(cmd) + Expect(err).NotTo(HaveOccurred(), "Failed to create ClusterRoleBinding") + + By("validating that the metrics service is available") + cmd = exec.Command("kubectl", "get", "service", metricsServiceName, "-n", namespace) + _, err = utils.Run(cmd) + Expect(err).NotTo(HaveOccurred(), "Metrics service should exist") + + By("getting the service account token") + token, err := serviceAccountToken() + Expect(err).NotTo(HaveOccurred()) + Expect(token).NotTo(BeEmpty()) + + By("waiting for the metrics endpoint to be ready") + verifyMetricsEndpointReady := func(g Gomega) { + cmd := exec.Command("kubectl", "get", "endpoints", metricsServiceName, "-n", namespace) + output, err := utils.Run(cmd) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(output).To(ContainSubstring("8443"), "Metrics endpoint is not ready") + } + Eventually(verifyMetricsEndpointReady).Should(Succeed()) + + By("verifying that the controller manager is serving the metrics server") + verifyMetricsServerStarted := func(g Gomega) { + cmd := exec.Command("kubectl", "logs", controllerPodName, "-n", namespace) + output, err := utils.Run(cmd) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(output).To(ContainSubstring("controller-runtime.metrics\tServing metrics server"), + "Metrics server not yet started") + } + Eventually(verifyMetricsServerStarted).Should(Succeed()) + + By("creating the curl-metrics pod to access the metrics endpoint") + cmd = exec.Command("kubectl", "run", "curl-metrics", "--restart=Never", + "--namespace", namespace, + "--image=curlimages/curl:latest", + "--overrides", + fmt.Sprintf(`{ + "spec": { + "containers": [{ + "name": "curl", + "image": "curlimages/curl:latest", + "command": ["/bin/sh", "-c"], + "args": ["curl -v -k -H 'Authorization: Bearer %s' https://%s.%s.svc.cluster.local:8443/metrics"], + "securityContext": { + "allowPrivilegeEscalation": false, + "capabilities": { + "drop": ["ALL"] + }, + "runAsNonRoot": true, + "runAsUser": 1000, + "seccompProfile": { + "type": "RuntimeDefault" + } + } + }], + "serviceAccount": "%s" + } + }`, token, metricsServiceName, namespace, serviceAccountName)) + _, err = utils.Run(cmd) + Expect(err).NotTo(HaveOccurred(), "Failed to create curl-metrics pod") + + By("waiting for the curl-metrics pod to complete.") + verifyCurlUp := func(g Gomega) { + cmd := exec.Command("kubectl", "get", "pods", "curl-metrics", + "-o", "jsonpath={.status.phase}", + "-n", namespace) + output, err := utils.Run(cmd) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(output).To(Equal("Succeeded"), "curl pod in wrong status") + } + Eventually(verifyCurlUp, 5*time.Minute).Should(Succeed()) + + By("getting the metrics by checking curl-metrics logs") + metricsOutput := getMetricsOutput() + Expect(metricsOutput).To(ContainSubstring( + "controller_runtime_reconcile_total", + )) + }) + + // +kubebuilder:scaffold:e2e-webhooks-checks + + // TODO: Customize the e2e test suite with scenarios specific to your project. + // Consider applying sample/CR(s) and check their status and/or verifying + // the reconciliation by using the metrics, i.e.: + // metricsOutput := getMetricsOutput() + // Expect(metricsOutput).To(ContainSubstring( + // fmt.Sprintf(`controller_runtime_reconcile_total{controller="%s",result="success"} 1`, + // strings.ToLower(), + // )) + }) +}) + +// serviceAccountToken returns a token for the specified service account in the given namespace. +// It uses the Kubernetes TokenRequest API to generate a token by directly sending a request +// and parsing the resulting token from the API response. +func serviceAccountToken() (string, error) { + const tokenRequestRawString = `{ + "apiVersion": "authentication.k8s.io/v1", + "kind": "TokenRequest" + }` + + // Temporary file to store the token request + secretName := fmt.Sprintf("%s-token-request", serviceAccountName) + tokenRequestFile := filepath.Join("/tmp", secretName) + err := os.WriteFile(tokenRequestFile, []byte(tokenRequestRawString), os.FileMode(0o644)) + if err != nil { + return "", err + } + + var out string + verifyTokenCreation := func(g Gomega) { + // Execute kubectl command to create the token + cmd := exec.Command("kubectl", "create", "--raw", fmt.Sprintf( + "/api/v1/namespaces/%s/serviceaccounts/%s/token", + namespace, + serviceAccountName, + ), "-f", tokenRequestFile) + + output, err := cmd.CombinedOutput() + g.Expect(err).NotTo(HaveOccurred()) + + // Parse the JSON output to extract the token + var token tokenRequest + err = json.Unmarshal(output, &token) + g.Expect(err).NotTo(HaveOccurred()) + + out = token.Status.Token + } + Eventually(verifyTokenCreation).Should(Succeed()) + + return out, err +} + +// getMetricsOutput retrieves and returns the logs from the curl pod used to access the metrics endpoint. +func getMetricsOutput() string { + By("getting the curl-metrics logs") + cmd := exec.Command("kubectl", "logs", "curl-metrics", "-n", namespace) + metricsOutput, err := utils.Run(cmd) + Expect(err).NotTo(HaveOccurred(), "Failed to retrieve logs from curl pod") + Expect(metricsOutput).To(ContainSubstring("< HTTP/1.1 200 OK")) + return metricsOutput +} + +// tokenRequest is a simplified representation of the Kubernetes TokenRequest API response, +// containing only the token field that we need to extract. +type tokenRequest struct { + Status struct { + Token string `json:"token"` + } `json:"status"` +} diff --git a/operator/test/utils/utils.go b/operator/test/utils/utils.go new file mode 100644 index 000000000..04a5141cc --- /dev/null +++ b/operator/test/utils/utils.go @@ -0,0 +1,251 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + "bufio" + "bytes" + "fmt" + "os" + "os/exec" + "strings" + + . "github.com/onsi/ginkgo/v2" //nolint:golint,revive +) + +const ( + prometheusOperatorVersion = "v0.77.1" + prometheusOperatorURL = "https://github.com/prometheus-operator/prometheus-operator/" + + "releases/download/%s/bundle.yaml" + + certmanagerVersion = "v1.16.3" + certmanagerURLTmpl = "https://github.com/cert-manager/cert-manager/releases/download/%s/cert-manager.yaml" +) + +func warnError(err error) { + _, _ = fmt.Fprintf(GinkgoWriter, "warning: %v\n", err) +} + +// Run executes the provided command within this context +func Run(cmd *exec.Cmd) (string, error) { + dir, _ := GetProjectDir() + cmd.Dir = dir + + if err := os.Chdir(cmd.Dir); err != nil { + _, _ = fmt.Fprintf(GinkgoWriter, "chdir dir: %s\n", err) + } + + cmd.Env = append(os.Environ(), "GO111MODULE=on") + command := strings.Join(cmd.Args, " ") + _, _ = fmt.Fprintf(GinkgoWriter, "running: %s\n", command) + output, err := cmd.CombinedOutput() + if err != nil { + return string(output), fmt.Errorf("%s failed with error: (%v) %s", command, err, string(output)) + } + + return string(output), nil +} + +// InstallPrometheusOperator installs the prometheus Operator to be used to export the enabled metrics. +func InstallPrometheusOperator() error { + url := fmt.Sprintf(prometheusOperatorURL, prometheusOperatorVersion) + cmd := exec.Command("kubectl", "create", "-f", url) + _, err := Run(cmd) + return err +} + +// UninstallPrometheusOperator uninstalls the prometheus +func UninstallPrometheusOperator() { + url := fmt.Sprintf(prometheusOperatorURL, prometheusOperatorVersion) + cmd := exec.Command("kubectl", "delete", "-f", url) + if _, err := Run(cmd); err != nil { + warnError(err) + } +} + +// IsPrometheusCRDsInstalled checks if any Prometheus CRDs are installed +// by verifying the existence of key CRDs related to Prometheus. +func IsPrometheusCRDsInstalled() bool { + // List of common Prometheus CRDs + prometheusCRDs := []string{ + "prometheuses.monitoring.coreos.com", + "prometheusrules.monitoring.coreos.com", + "prometheusagents.monitoring.coreos.com", + } + + cmd := exec.Command("kubectl", "get", "crds", "-o", "custom-columns=NAME:.metadata.name") + output, err := Run(cmd) + if err != nil { + return false + } + crdList := GetNonEmptyLines(output) + for _, crd := range prometheusCRDs { + for _, line := range crdList { + if strings.Contains(line, crd) { + return true + } + } + } + + return false +} + +// UninstallCertManager uninstalls the cert manager +func UninstallCertManager() { + url := fmt.Sprintf(certmanagerURLTmpl, certmanagerVersion) + cmd := exec.Command("kubectl", "delete", "-f", url) + if _, err := Run(cmd); err != nil { + warnError(err) + } +} + +// InstallCertManager installs the cert manager bundle. +func InstallCertManager() error { + url := fmt.Sprintf(certmanagerURLTmpl, certmanagerVersion) + cmd := exec.Command("kubectl", "apply", "-f", url) + if _, err := Run(cmd); err != nil { + return err + } + // Wait for cert-manager-webhook to be ready, which can take time if cert-manager + // was re-installed after uninstalling on a cluster. + cmd = exec.Command("kubectl", "wait", "deployment.apps/cert-manager-webhook", + "--for", "condition=Available", + "--namespace", "cert-manager", + "--timeout", "5m", + ) + + _, err := Run(cmd) + return err +} + +// IsCertManagerCRDsInstalled checks if any Cert Manager CRDs are installed +// by verifying the existence of key CRDs related to Cert Manager. +func IsCertManagerCRDsInstalled() bool { + // List of common Cert Manager CRDs + certManagerCRDs := []string{ + "certificates.cert-manager.io", + "issuers.cert-manager.io", + "clusterissuers.cert-manager.io", + "certificaterequests.cert-manager.io", + "orders.acme.cert-manager.io", + "challenges.acme.cert-manager.io", + } + + // Execute the kubectl command to get all CRDs + cmd := exec.Command("kubectl", "get", "crds") + output, err := Run(cmd) + if err != nil { + return false + } + + // Check if any of the Cert Manager CRDs are present + crdList := GetNonEmptyLines(output) + for _, crd := range certManagerCRDs { + for _, line := range crdList { + if strings.Contains(line, crd) { + return true + } + } + } + + return false +} + +// LoadImageToKindClusterWithName loads a local docker image to the kind cluster +func LoadImageToKindClusterWithName(name string) error { + cluster := "kind" + if v, ok := os.LookupEnv("KIND_CLUSTER"); ok { + cluster = v + } + kindOptions := []string{"load", "docker-image", name, "--name", cluster} + cmd := exec.Command("kind", kindOptions...) + _, err := Run(cmd) + return err +} + +// GetNonEmptyLines converts given command output string into individual objects +// according to line breakers, and ignores the empty elements in it. +func GetNonEmptyLines(output string) []string { + var res []string + elements := strings.Split(output, "\n") + for _, element := range elements { + if element != "" { + res = append(res, element) + } + } + + return res +} + +// GetProjectDir will return the directory where the project is +func GetProjectDir() (string, error) { + wd, err := os.Getwd() + if err != nil { + return wd, err + } + wd = strings.Replace(wd, "/test/e2e", "", -1) + return wd, nil +} + +// UncommentCode searches for target in the file and remove the comment prefix +// of the target content. The target content may span multiple lines. +func UncommentCode(filename, target, prefix string) error { + // false positive + // nolint:gosec + content, err := os.ReadFile(filename) + if err != nil { + return err + } + strContent := string(content) + + idx := strings.Index(strContent, target) + if idx < 0 { + return fmt.Errorf("unable to find the code %s to be uncomment", target) + } + + out := new(bytes.Buffer) + _, err = out.Write(content[:idx]) + if err != nil { + return err + } + + scanner := bufio.NewScanner(bytes.NewBufferString(target)) + if !scanner.Scan() { + return nil + } + for { + _, err := out.WriteString(strings.TrimPrefix(scanner.Text(), prefix)) + if err != nil { + return err + } + // Avoid writing a newline in case the previous line was the last in target. + if !scanner.Scan() { + break + } + if _, err := out.WriteString("\n"); err != nil { + return err + } + } + + _, err = out.Write(content[idx+len(target):]) + if err != nil { + return err + } + // false positive + // nolint:gosec + return os.WriteFile(filename, out.Bytes(), 0644) +} From 46713c3f4e2d287fd8c0768b8a5f2ee0a8389aae Mon Sep 17 00:00:00 2001 From: royyhuang Date: Fri, 9 May 2025 06:03:05 +0000 Subject: [PATCH 05/14] enable lmcache remote cache server offloading Signed-off-by: royyhuang --- operator/PROJECT | 9 + operator/api/v1alpha1/cacheserver_types.go | 82 +++++ operator/api/v1alpha1/common.go | 32 ++ operator/api/v1alpha1/vllmruntime_types.go | 15 - .../api/v1alpha1/zz_generated.deepcopy.go | 92 ++++++ operator/cmd/main.go | 7 + ...production-stack.vllm.ai_cacheservers.yaml | 113 +++++++ operator/config/crd/kustomization.yaml | 1 + .../config/rbac/cacheserver_admin_role.yaml | 27 ++ .../config/rbac/cacheserver_editor_role.yaml | 33 ++ .../config/rbac/cacheserver_viewer_role.yaml | 29 ++ operator/config/rbac/kustomization.yaml | 3 + operator/config/rbac/role.yaml | 3 + operator/config/samples/kustomization.yaml | 1 + ...production-stack_v1alpha1_cacheserver.yaml | 28 ++ .../production-stack_v1alpha_vllmruntime.yaml | 4 +- operator/go.mod | 2 +- .../controller/cacheserver_controller.go | 289 ++++++++++++++++++ .../controller/cacheserver_controller_test.go | 84 +++++ 19 files changed, 836 insertions(+), 18 deletions(-) create mode 100644 operator/api/v1alpha1/cacheserver_types.go create mode 100644 operator/api/v1alpha1/common.go create mode 100644 operator/config/crd/bases/production-stack.vllm.ai_cacheservers.yaml create mode 100644 operator/config/rbac/cacheserver_admin_role.yaml create mode 100644 operator/config/rbac/cacheserver_editor_role.yaml create mode 100644 operator/config/rbac/cacheserver_viewer_role.yaml create mode 100644 operator/config/samples/production-stack_v1alpha1_cacheserver.yaml create mode 100644 operator/internal/controller/cacheserver_controller.go create mode 100644 operator/internal/controller/cacheserver_controller_test.go diff --git a/operator/PROJECT b/operator/PROJECT index 696ec22c5..f59091172 100644 --- a/operator/PROJECT +++ b/operator/PROJECT @@ -26,4 +26,13 @@ resources: kind: VLLMRouter path: production-stack/api/v1alpha1 version: v1alpha1 +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: vllm.ai + group: production-stack + kind: CacheServer + path: production-stack/api/v1alpha1 + version: v1alpha1 version: "3" diff --git a/operator/api/v1alpha1/cacheserver_types.go b/operator/api/v1alpha1/cacheserver_types.go new file mode 100644 index 000000000..08c44f139 --- /dev/null +++ b/operator/api/v1alpha1/cacheserver_types.go @@ -0,0 +1,82 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! +// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. + +// CacheServerSpec defines the desired state of CacheServer +type CacheServerSpec struct { + // Image configuration for the cache server + Image ImageSpec `json:"image"` + + // Container port for the cache server + // +kubebuilder:default=8000 + Port int32 `json:"port"` + + // Resource requirements + Resources ResourceRequirements `json:"resources"` + + // Number of replicas + // +kubebuilder:default=1 + Replicas int32 `json:"replicas"` + + // Deployment strategy + // +kubebuilder:validation:Enum=RollingUpdate;Recreate + // +kubebuilder:default=RollingUpdate + DeploymentStrategy string `json:"deploymentStrategy"` +} + +// CacheServerStatus defines the observed state of CacheServer +type CacheServerStatus struct { + // Last time the status was updated + LastUpdated metav1.Time `json:"lastUpdated,omitempty"` + + // Current status of the cache server + Status string `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Status",type="string",JSONPath=".status.status" +// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp" + +// CacheServer is the Schema for the cacheservers API +type CacheServer struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec CacheServerSpec `json:"spec,omitempty"` + Status CacheServerStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// CacheServerList contains a list of CacheServer +type CacheServerList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []CacheServer `json:"items"` +} + +func init() { + SchemeBuilder.Register(&CacheServer{}, &CacheServerList{}) +} diff --git a/operator/api/v1alpha1/common.go b/operator/api/v1alpha1/common.go new file mode 100644 index 000000000..065d9c689 --- /dev/null +++ b/operator/api/v1alpha1/common.go @@ -0,0 +1,32 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +// ResourceRequirements defines the resource requirements +type ResourceRequirements struct { + CPU string `json:"cpu,omitempty"` + Memory string `json:"memory,omitempty"` + GPU string `json:"gpu,omitempty"` +} + +// ImageSpec defines the container image configuration +type ImageSpec struct { + Registry string `json:"registry"` + Name string `json:"name"` + PullPolicy string `json:"pullPolicy,omitempty"` + PullSecretName string `json:"pullSecretName,omitempty"` +} diff --git a/operator/api/v1alpha1/vllmruntime_types.go b/operator/api/v1alpha1/vllmruntime_types.go index 117a77463..1ce19c2b7 100644 --- a/operator/api/v1alpha1/vllmruntime_types.go +++ b/operator/api/v1alpha1/vllmruntime_types.go @@ -133,21 +133,6 @@ type EnvVar struct { Value string `json:"value"` } -// ResourceRequirements defines the resource requirements -type ResourceRequirements struct { - CPU string `json:"cpu,omitempty"` - Memory string `json:"memory,omitempty"` - GPU string `json:"gpu,omitempty"` -} - -// ImageSpec defines the container image configuration -type ImageSpec struct { - Registry string `json:"registry"` - Name string `json:"name"` - PullPolicy string `json:"pullPolicy,omitempty"` - PullSecretName string `json:"pullSecretName,omitempty"` -} - // VLLMRuntimeStatus defines the observed state of VLLMRuntime type VLLMRuntimeStatus struct { // Model status diff --git a/operator/api/v1alpha1/zz_generated.deepcopy.go b/operator/api/v1alpha1/zz_generated.deepcopy.go index 51e91ba0a..21bbc16e8 100644 --- a/operator/api/v1alpha1/zz_generated.deepcopy.go +++ b/operator/api/v1alpha1/zz_generated.deepcopy.go @@ -25,6 +25,98 @@ import ( runtime "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CacheServer) DeepCopyInto(out *CacheServer) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + out.Spec = in.Spec + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CacheServer. +func (in *CacheServer) DeepCopy() *CacheServer { + if in == nil { + return nil + } + out := new(CacheServer) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *CacheServer) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CacheServerList) DeepCopyInto(out *CacheServerList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]CacheServer, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CacheServerList. +func (in *CacheServerList) DeepCopy() *CacheServerList { + if in == nil { + return nil + } + out := new(CacheServerList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *CacheServerList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CacheServerSpec) DeepCopyInto(out *CacheServerSpec) { + *out = *in + out.Image = in.Image + out.Resources = in.Resources +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CacheServerSpec. +func (in *CacheServerSpec) DeepCopy() *CacheServerSpec { + if in == nil { + return nil + } + out := new(CacheServerSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CacheServerStatus) DeepCopyInto(out *CacheServerStatus) { + *out = *in + in.LastUpdated.DeepCopyInto(&out.LastUpdated) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CacheServerStatus. +func (in *CacheServerStatus) DeepCopy() *CacheServerStatus { + if in == nil { + return nil + } + out := new(CacheServerStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EnvVar) DeepCopyInto(out *EnvVar) { *out = *in diff --git a/operator/cmd/main.go b/operator/cmd/main.go index 1a32cd700..08c7e291b 100644 --- a/operator/cmd/main.go +++ b/operator/cmd/main.go @@ -217,6 +217,13 @@ func main() { setupLog.Error(err, "unable to create controller", "controller", "VLLMRuntime") os.Exit(1) } + if err = (&controller.CacheServerReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "CacheServer") + os.Exit(1) + } // +kubebuilder:scaffold:builder if metricsCertWatcher != nil { diff --git a/operator/config/crd/bases/production-stack.vllm.ai_cacheservers.yaml b/operator/config/crd/bases/production-stack.vllm.ai_cacheservers.yaml new file mode 100644 index 000000000..77f44149e --- /dev/null +++ b/operator/config/crd/bases/production-stack.vllm.ai_cacheservers.yaml @@ -0,0 +1,113 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.2 + name: cacheservers.production-stack.vllm.ai +spec: + group: production-stack.vllm.ai + names: + kind: CacheServer + listKind: CacheServerList + plural: cacheservers + singular: cacheserver + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.status + name: Status + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: CacheServer is the Schema for the cacheservers API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: CacheServerSpec defines the desired state of CacheServer + properties: + deploymentStrategy: + default: RollingUpdate + description: Deployment strategy + enum: + - RollingUpdate + - Recreate + type: string + image: + description: Image configuration for the cache server + properties: + name: + type: string + pullPolicy: + type: string + pullSecretName: + type: string + registry: + type: string + required: + - name + - registry + type: object + port: + default: 8000 + description: Container port for the cache server + format: int32 + type: integer + replicas: + default: 1 + description: Number of replicas + format: int32 + type: integer + resources: + description: Resource requirements + properties: + cpu: + type: string + gpu: + type: string + memory: + type: string + type: object + required: + - deploymentStrategy + - image + - port + - replicas + - resources + type: object + status: + description: CacheServerStatus defines the observed state of CacheServer + properties: + lastUpdated: + description: Last time the status was updated + format: date-time + type: string + status: + description: Current status of the cache server + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/operator/config/crd/kustomization.yaml b/operator/config/crd/kustomization.yaml index b3158c4de..560dffa30 100644 --- a/operator/config/crd/kustomization.yaml +++ b/operator/config/crd/kustomization.yaml @@ -4,6 +4,7 @@ resources: - bases/production-stack.vllm.ai_vllmruntimes.yaml - bases/production-stack.vllm.ai_vllmrouters.yaml +- bases/production-stack.vllm.ai_cacheservers.yaml # +kubebuilder:scaffold:crdkustomizeresource patches: diff --git a/operator/config/rbac/cacheserver_admin_role.yaml b/operator/config/rbac/cacheserver_admin_role.yaml new file mode 100644 index 000000000..fd462e309 --- /dev/null +++ b/operator/config/rbac/cacheserver_admin_role.yaml @@ -0,0 +1,27 @@ +# This rule is not used by the project production-stack itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants full permissions ('*') over production-stack.vllm.ai. +# This role is intended for users authorized to modify roles and bindings within the cluster, +# enabling them to delegate specific permissions to other users or groups as needed. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize + name: cacheserver-admin-role +rules: +- apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers + verbs: + - '*' +- apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers/status + verbs: + - get diff --git a/operator/config/rbac/cacheserver_editor_role.yaml b/operator/config/rbac/cacheserver_editor_role.yaml new file mode 100644 index 000000000..2291c7c60 --- /dev/null +++ b/operator/config/rbac/cacheserver_editor_role.yaml @@ -0,0 +1,33 @@ +# This rule is not used by the project production-stack itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants permissions to create, update, and delete resources within the production-stack.vllm.ai. +# This role is intended for users who need to manage these resources +# but should not control RBAC or manage permissions for others. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize + name: cacheserver-editor-role +rules: +- apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers/status + verbs: + - get diff --git a/operator/config/rbac/cacheserver_viewer_role.yaml b/operator/config/rbac/cacheserver_viewer_role.yaml new file mode 100644 index 000000000..877a05a4c --- /dev/null +++ b/operator/config/rbac/cacheserver_viewer_role.yaml @@ -0,0 +1,29 @@ +# This rule is not used by the project production-stack itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants read-only access to production-stack.vllm.ai resources. +# This role is intended for users who need visibility into these resources +# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize + name: cacheserver-viewer-role +rules: +- apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers + verbs: + - get + - list + - watch +- apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers/status + verbs: + - get diff --git a/operator/config/rbac/kustomization.yaml b/operator/config/rbac/kustomization.yaml index 7d46d2f99..e47dad1b5 100644 --- a/operator/config/rbac/kustomization.yaml +++ b/operator/config/rbac/kustomization.yaml @@ -22,6 +22,9 @@ resources: # default, aiding admins in cluster management. Those roles are # not used by the {{ .ProjectName }} itself. You can comment the following lines # if you do not want those helpers be installed with your Project. +- cacheserver_admin_role.yaml +- cacheserver_editor_role.yaml +- cacheserver_viewer_role.yaml - router_admin_role.yaml - router_editor_role.yaml - router_viewer_role.yaml diff --git a/operator/config/rbac/role.yaml b/operator/config/rbac/role.yaml index 941b757ec..10a45fe92 100644 --- a/operator/config/rbac/role.yaml +++ b/operator/config/rbac/role.yaml @@ -33,6 +33,7 @@ rules: - apiGroups: - production-stack.vllm.ai resources: + - cacheservers - vllmrouters - vllmruntimes verbs: @@ -46,6 +47,7 @@ rules: - apiGroups: - production-stack.vllm.ai resources: + - cacheservers/finalizers - vllmrouters/finalizers - vllmruntimes/finalizers verbs: @@ -53,6 +55,7 @@ rules: - apiGroups: - production-stack.vllm.ai resources: + - cacheservers/status - vllmrouters/status - vllmruntimes/status verbs: diff --git a/operator/config/samples/kustomization.yaml b/operator/config/samples/kustomization.yaml index 3cd7d3881..21e1d6999 100644 --- a/operator/config/samples/kustomization.yaml +++ b/operator/config/samples/kustomization.yaml @@ -5,4 +5,5 @@ kind: Kustomization resources: - production-stack_v1alpha1_vllmruntime.yaml - production-stack_v1alpha1_vllmrouter.yaml +- production-stack_v1alpha1_cacheserver.yaml # +kubebuilder:scaffold:manifestskustomizesamples diff --git a/operator/config/samples/production-stack_v1alpha1_cacheserver.yaml b/operator/config/samples/production-stack_v1alpha1_cacheserver.yaml new file mode 100644 index 000000000..2304c8646 --- /dev/null +++ b/operator/config/samples/production-stack_v1alpha1_cacheserver.yaml @@ -0,0 +1,28 @@ +apiVersion: production-stack.vllm.ai/v1alpha1 +kind: CacheServer +metadata: + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize + name: cacheserver-sample +spec: + # Image configuration + image: + registry: "docker.io" + name: "lmcache/vllm-openai:2025-04-18" + pullPolicy: "IfNotPresent" + pullSecretName: "" + + # Container port + port: 8000 + + # Resource requirements + resources: + cpu: "2" + memory: "16Gi" + + # Number of replicas + replicas: 1 + + # Deployment strategy + deploymentStrategy: "Recreate" diff --git a/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml b/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml index 0e13adbdc..7b8947a4b 100644 --- a/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml +++ b/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml @@ -21,8 +21,8 @@ spec: enabled: true cpuOffloadingBufferSize: "15" diskOffloadingBufferSize: "8" - remoteUrl: "" - remoteSerde: "" + # remoteUrl: "lm://cacheserver-sample.default.svc.cluster.local:80" + # remoteSerde: "naive" # Model configuration model: diff --git a/operator/go.mod b/operator/go.mod index 34589b9a0..b158ae91f 100644 --- a/operator/go.mod +++ b/operator/go.mod @@ -7,10 +7,10 @@ toolchain go1.24.2 require ( github.com/onsi/ginkgo/v2 v2.23.4 github.com/onsi/gomega v1.37.0 + k8s.io/api v0.33.0 k8s.io/apimachinery v0.33.0 k8s.io/client-go v0.33.0 sigs.k8s.io/controller-runtime v0.20.4 - k8s.io/api v0.33.0 ) require ( diff --git a/operator/internal/controller/cacheserver_controller.go b/operator/internal/controller/cacheserver_controller.go new file mode 100644 index 000000000..1a534dd27 --- /dev/null +++ b/operator/internal/controller/cacheserver_controller.go @@ -0,0 +1,289 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "fmt" + "reflect" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/util/retry" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + + productionstackv1alpha1 "production-stack/api/v1alpha1" +) + +// CacheServerReconciler reconciles a CacheServer object +type CacheServerReconciler struct { + client.Client + Scheme *runtime.Scheme +} + +// +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=cacheservers,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=cacheservers/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=cacheservers/finalizers,verbs=update +// +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +func (r *CacheServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := log.FromContext(ctx) + + // Fetch the CacheServer instance + cacheServer := &productionstackv1alpha1.CacheServer{} + err := r.Get(ctx, req.NamespacedName, cacheServer) + if err != nil { + if errors.IsNotFound(err) { + // Request object not found, could have been deleted after reconcile request. + // Return and don't requeue + log.Info("CacheServer resource not found. Ignoring since object must be deleted") + return ctrl.Result{}, nil + } + // Error reading the object - requeue the request. + log.Error(err, "Failed to get CacheServer") + return ctrl.Result{}, err + } + + // Check if the service already exists, if not create a new one + foundService := &corev1.Service{} + err = r.Get(ctx, types.NamespacedName{Name: cacheServer.Name, Namespace: cacheServer.Namespace}, foundService) + if err != nil && errors.IsNotFound(err) { + // Define a new service + svc := r.serviceForCacheServer(cacheServer) + log.Info("Creating a new Service", "Service.Namespace", svc.Namespace, "Service.Name", svc.Name) + err = r.Create(ctx, svc) + if err != nil { + log.Error(err, "Failed to create new Service", "Service.Namespace", svc.Namespace, "Service.Name", svc.Name) + return ctrl.Result{}, err + } + // Service created successfully - return and requeue + return ctrl.Result{Requeue: true}, nil + } else if err != nil { + log.Error(err, "Failed to get Service") + return ctrl.Result{}, err + } + + // Check if the deployment already exists, if not create a new one + found := &appsv1.Deployment{} + err = r.Get(ctx, types.NamespacedName{Name: cacheServer.Name, Namespace: cacheServer.Namespace}, found) + if err != nil && errors.IsNotFound(err) { + // Define a new deployment + dep := r.deploymentForCacheServer(cacheServer) + log.Info("Creating a new Deployment", "Deployment.Namespace", dep.Namespace, "Deployment.Name", dep.Name) + err = r.Create(ctx, dep) + if err != nil { + log.Error(err, "Failed to create new Deployment", "Deployment.Namespace", dep.Namespace, "Deployment.Name", dep.Name) + return ctrl.Result{}, err + } + // Deployment created successfully - return and requeue + return ctrl.Result{Requeue: true}, nil + } else if err != nil { + log.Error(err, "Failed to get Deployment") + return ctrl.Result{}, err + } + + // Update the deployment if needed + if r.deploymentNeedsUpdate(found, cacheServer) { + log.Info("Updating Deployment", "Deployment.Namespace", found.Namespace, "Deployment.Name", found.Name) + // Create new deployment spec + newDep := r.deploymentForCacheServer(cacheServer) + + err = r.Update(ctx, newDep) + if err != nil { + log.Error(err, "Failed to update Deployment", "Deployment.Namespace", found.Namespace, "Deployment.Name", found.Name) + return ctrl.Result{}, err + } + // Deployment updated successfully - return and requeue + return ctrl.Result{Requeue: true}, nil + } + + // Update the status + if err := r.updateStatus(ctx, cacheServer, found); err != nil { + log.Error(err, "Failed to update CacheServer status") + return ctrl.Result{}, err + } + + return ctrl.Result{}, nil +} + +// deploymentForCacheServer returns a CacheServer Deployment object +func (r *CacheServerReconciler) deploymentForCacheServer(cacheServer *productionstackv1alpha1.CacheServer) *appsv1.Deployment { + labels := map[string]string{ + "app": cacheServer.Name, + } + + // Build resource requirements + resources := corev1.ResourceRequirements{ + Requests: corev1.ResourceList{}, + Limits: corev1.ResourceList{}, + } + + if cacheServer.Spec.Resources.CPU != "" { + resources.Requests[corev1.ResourceCPU] = resource.MustParse(cacheServer.Spec.Resources.CPU) + resources.Limits[corev1.ResourceCPU] = resource.MustParse(cacheServer.Spec.Resources.CPU) + } + + if cacheServer.Spec.Resources.Memory != "" { + resources.Requests[corev1.ResourceMemory] = resource.MustParse(cacheServer.Spec.Resources.Memory) + resources.Limits[corev1.ResourceMemory] = resource.MustParse(cacheServer.Spec.Resources.Memory) + } + + // Get the image from Image spec + image := cacheServer.Spec.Image.Registry + "/" + cacheServer.Spec.Image.Name + + // Get the image pull policy + imagePullPolicy := corev1.PullIfNotPresent + if cacheServer.Spec.Image.PullPolicy != "" { + imagePullPolicy = corev1.PullPolicy(cacheServer.Spec.Image.PullPolicy) + } + + dep := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: cacheServer.Name, + Namespace: cacheServer.Namespace, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &cacheServer.Spec.Replicas, + Selector: &metav1.LabelSelector{ + MatchLabels: labels, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: labels, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "cache-server", + Image: image, + ImagePullPolicy: imagePullPolicy, + Command: []string{ + "lmcache_experimental_server", + "0.0.0.0", + fmt.Sprintf("%d", cacheServer.Spec.Port)}, + Ports: []corev1.ContainerPort{ + { + Name: "http", + ContainerPort: cacheServer.Spec.Port, + }, + }, + Resources: resources, + }, + }, + }, + }, + }, + } + + // Set the owner reference + ctrl.SetControllerReference(cacheServer, dep, r.Scheme) + return dep +} + +// deploymentNeedsUpdate checks if the deployment needs to be updated +func (r *CacheServerReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, cs *productionstackv1alpha1.CacheServer) bool { + // Compare replicas + if *dep.Spec.Replicas != cs.Spec.Replicas { + return true + } + + // Compare resources + expectedResources := r.deploymentForCacheServer(cs).Spec.Template.Spec.Containers[0].Resources + actualResources := dep.Spec.Template.Spec.Containers[0].Resources + if !reflect.DeepEqual(expectedResources, actualResources) { + return true + } + + return false +} + +// updateStatus updates the status of the CacheServer +func (r *CacheServerReconciler) updateStatus(ctx context.Context, cs *productionstackv1alpha1.CacheServer, dep *appsv1.Deployment) error { + return retry.OnError(retry.DefaultRetry, func(err error) bool { + return errors.IsConflict(err) + }, func() error { + // Get the latest version of the CacheServer + latestCS := &productionstackv1alpha1.CacheServer{} + if err := r.Get(ctx, types.NamespacedName{Name: cs.Name, Namespace: cs.Namespace}, latestCS); err != nil { + return err + } + + // Update the status fields + latestCS.Status.LastUpdated = metav1.Now() + + // Update status based on deployment status + if dep.Status.AvailableReplicas > 0 { + latestCS.Status.Status = "Ready" + } else if dep.Status.UpdatedReplicas > 0 { + latestCS.Status.Status = "Updating" + } else { + latestCS.Status.Status = "NotReady" + } + + return r.Status().Update(ctx, latestCS) + }) +} + +// serviceForCacheServer returns a CacheServer Service object +func (r *CacheServerReconciler) serviceForCacheServer(cacheServer *productionstackv1alpha1.CacheServer) *corev1.Service { + labels := map[string]string{ + "app": cacheServer.Name, + } + + svc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: cacheServer.Name, + Namespace: cacheServer.Namespace, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeClusterIP, + Selector: labels, + Ports: []corev1.ServicePort{ + { + Name: "http", + Port: 80, + TargetPort: intstr.FromInt(int(cacheServer.Spec.Port)), + Protocol: corev1.ProtocolTCP, + }, + }, + }, + } + + // Set the owner reference + ctrl.SetControllerReference(cacheServer, svc, r.Scheme) + return svc +} + +// SetupWithManager sets up the controller with the Manager. +func (r *CacheServerReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&productionstackv1alpha1.CacheServer{}). + Owns(&appsv1.Deployment{}). + Owns(&corev1.Service{}). + Complete(r) +} diff --git a/operator/internal/controller/cacheserver_controller_test.go b/operator/internal/controller/cacheserver_controller_test.go new file mode 100644 index 000000000..681a04d0d --- /dev/null +++ b/operator/internal/controller/cacheserver_controller_test.go @@ -0,0 +1,84 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + productionstackv1alpha1 "production-stack/api/v1alpha1" +) + +var _ = Describe("CacheServer Controller", func() { + Context("When reconciling a resource", func() { + const resourceName = "test-resource" + + ctx := context.Background() + + typeNamespacedName := types.NamespacedName{ + Name: resourceName, + Namespace: "default", // TODO(user):Modify as needed + } + cacheserver := &productionstackv1alpha1.CacheServer{} + + BeforeEach(func() { + By("creating the custom resource for the Kind CacheServer") + err := k8sClient.Get(ctx, typeNamespacedName, cacheserver) + if err != nil && errors.IsNotFound(err) { + resource := &productionstackv1alpha1.CacheServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: resourceName, + Namespace: "default", + }, + // TODO(user): Specify other spec details if needed. + } + Expect(k8sClient.Create(ctx, resource)).To(Succeed()) + } + }) + + AfterEach(func() { + // TODO(user): Cleanup logic after each test, like removing the resource instance. + resource := &productionstackv1alpha1.CacheServer{} + err := k8sClient.Get(ctx, typeNamespacedName, resource) + Expect(err).NotTo(HaveOccurred()) + + By("Cleanup the specific resource instance CacheServer") + Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) + }) + It("should successfully reconcile the resource", func() { + By("Reconciling the created resource") + controllerReconciler := &CacheServerReconciler{ + Client: k8sClient, + Scheme: k8sClient.Scheme(), + } + + _, err := controllerReconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: typeNamespacedName, + }) + Expect(err).NotTo(HaveOccurred()) + // TODO(user): Add more specific assertions depending on your controller's reconciliation logic. + // Example: If you expect a certain status condition after reconciliation, verify it here. + }) + }) +}) From 397deac7409cf1b55f8b3dfe355f54e077222dd7 Mon Sep 17 00:00:00 2001 From: royyhuang Date: Fri, 9 May 2025 07:14:45 +0000 Subject: [PATCH 06/14] fix service discorvery issue by adding readiness probe to vllm pod Signed-off-by: royyhuang --- operator/api/v1alpha1/vllmrouter_types.go | 4 +++ .../production-stack.vllm.ai_vllmrouters.yaml | 4 +++ .../production-stack_v1alpha_vllmrouter.yaml | 3 ++ .../production-stack_v1alpha_vllmruntime.yaml | 4 +-- .../controller/cacheserver_controller.go | 4 +-- .../controller/vllmrouter_controller.go | 5 ++- .../controller/vllmruntime_controller.go | 32 +++++++++++++++++-- 7 files changed, 45 insertions(+), 11 deletions(-) diff --git a/operator/api/v1alpha1/vllmrouter_types.go b/operator/api/v1alpha1/vllmrouter_types.go index 446ee10b6..c0a81ff71 100644 --- a/operator/api/v1alpha1/vllmrouter_types.go +++ b/operator/api/v1alpha1/vllmrouter_types.go @@ -39,6 +39,10 @@ type VLLMRouterSpec struct { // +kubebuilder:default=k8s ServiceDiscovery string `json:"serviceDiscovery,omitempty"` + // K8sLabelSelector specifies the label selector for vLLM runtime pods when using k8s service discovery + // +kubebuilder:validation:RequiredWhen=ServiceDiscovery=k8s + K8sLabelSelector string `json:"k8sLabelSelector,omitempty"` + // StaticBackends is required when using static service discovery // +kubebuilder:validation:RequiredWhen=ServiceDiscovery=static StaticBackends string `json:"staticBackends,omitempty"` diff --git a/operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml b/operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml index 8668380c3..c6de29def 100644 --- a/operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml +++ b/operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml @@ -81,6 +81,10 @@ spec: - name - registry type: object + k8sLabelSelector: + description: K8sLabelSelector specifies the label selector for vLLM + runtime pods when using k8s service discovery + type: string nodeSelectorTerms: description: NodeSelectorTerms for pod scheduling items: diff --git a/operator/config/samples/production-stack_v1alpha_vllmrouter.yaml b/operator/config/samples/production-stack_v1alpha_vllmrouter.yaml index c774472dd..80995f0c9 100644 --- a/operator/config/samples/production-stack_v1alpha_vllmrouter.yaml +++ b/operator/config/samples/production-stack_v1alpha_vllmrouter.yaml @@ -15,6 +15,9 @@ spec: # Service discovery method (k8s or static) serviceDiscovery: k8s + # Label selector for vLLM runtime pods + k8sLabelSelector: "app=vllmruntime-sample" + # Routing strategy (roundrobin or session) routingLogic: roundrobin diff --git a/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml b/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml index 7b8947a4b..a58a6c0ee 100644 --- a/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml +++ b/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml @@ -21,8 +21,8 @@ spec: enabled: true cpuOffloadingBufferSize: "15" diskOffloadingBufferSize: "8" - # remoteUrl: "lm://cacheserver-sample.default.svc.cluster.local:80" - # remoteSerde: "naive" + remoteUrl: "lm://cacheserver-sample.default.svc.cluster.local:80" + remoteSerde: "naive" # Model configuration model: diff --git a/operator/internal/controller/cacheserver_controller.go b/operator/internal/controller/cacheserver_controller.go index 1a534dd27..2f56ddf8f 100644 --- a/operator/internal/controller/cacheserver_controller.go +++ b/operator/internal/controller/cacheserver_controller.go @@ -224,9 +224,7 @@ func (r *CacheServerReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, cs // updateStatus updates the status of the CacheServer func (r *CacheServerReconciler) updateStatus(ctx context.Context, cs *productionstackv1alpha1.CacheServer, dep *appsv1.Deployment) error { - return retry.OnError(retry.DefaultRetry, func(err error) bool { - return errors.IsConflict(err) - }, func() error { + return retry.RetryOnConflict(retry.DefaultRetry, func() error { // Get the latest version of the CacheServer latestCS := &productionstackv1alpha1.CacheServer{} if err := r.Get(ctx, types.NamespacedName{Name: cs.Name, Namespace: cs.Namespace}, latestCS); err != nil { diff --git a/operator/internal/controller/vllmrouter_controller.go b/operator/internal/controller/vllmrouter_controller.go index dc99ee433..00f90e1b9 100644 --- a/operator/internal/controller/vllmrouter_controller.go +++ b/operator/internal/controller/vllmrouter_controller.go @@ -208,6 +208,7 @@ func (r *VLLMRouterReconciler) deploymentForVLLMRouter(router *servingv1alpha1.V if router.Spec.ServiceDiscovery == "k8s" { args = append(args, "--k8s-namespace", router.Namespace, + "--k8s-label-selector", router.Spec.K8sLabelSelector, ) } else if router.Spec.ServiceDiscovery == "static" { if router.Spec.StaticBackends == "" || router.Spec.StaticModels == "" { @@ -324,9 +325,7 @@ func (r *VLLMRouterReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, rou // updateStatus updates the status of the VLLMRouter func (r *VLLMRouterReconciler) updateStatus(ctx context.Context, router *servingv1alpha1.VLLMRouter, dep *appsv1.Deployment) error { - return retry.OnError(retry.DefaultRetry, func(err error) bool { - return errors.IsConflict(err) - }, func() error { + return retry.RetryOnConflict(retry.DefaultRetry, func() error { // Get the latest version of the VLLMRouter latestRouter := &servingv1alpha1.VLLMRouter{} if err := r.Get(ctx, types.NamespacedName{Name: router.Name, Namespace: router.Namespace}, latestRouter); err != nil { diff --git a/operator/internal/controller/vllmruntime_controller.go b/operator/internal/controller/vllmruntime_controller.go index a4937fc59..8725b9c15 100644 --- a/operator/internal/controller/vllmruntime_controller.go +++ b/operator/internal/controller/vllmruntime_controller.go @@ -392,6 +392,34 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production }, }, Resources: resources, + ReadinessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/health", + Port: intstr.FromInt(int(vllmRuntime.Spec.Port)), + Scheme: corev1.URISchemeHTTP, + }, + }, + InitialDelaySeconds: 10, + PeriodSeconds: 5, + TimeoutSeconds: 3, + SuccessThreshold: 1, + FailureThreshold: 3, + }, + LivenessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/health", + Port: intstr.FromInt(int(vllmRuntime.Spec.Port)), + Scheme: corev1.URISchemeHTTP, + }, + }, + InitialDelaySeconds: 30, + PeriodSeconds: 10, + TimeoutSeconds: 3, + SuccessThreshold: 1, + FailureThreshold: 3, + }, }, }, }, @@ -479,9 +507,7 @@ func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr // updateStatus updates the status of the VLLMRuntime func (r *VLLMRuntimeReconciler) updateStatus(ctx context.Context, vr *productionstackv1alpha1.VLLMRuntime, dep *appsv1.Deployment) error { - return retry.OnError(retry.DefaultRetry, func(err error) bool { - return errors.IsConflict(err) - }, func() error { + return retry.RetryOnConflict(retry.DefaultRetry, func() error { // Get the latest version of the VLLMRuntime latestVR := &productionstackv1alpha1.VLLMRuntime{} if err := r.Get(ctx, types.NamespacedName{Name: vr.Name, Namespace: vr.Namespace}, latestVR); err != nil { From 56ce7f9a35c8b988e9dc8bf4e674511033b8f385 Mon Sep 17 00:00:00 2001 From: royyhuang Date: Tue, 13 May 2025 21:08:45 +0000 Subject: [PATCH 07/14] fix readiness probe Signed-off-by: royyhuang --- operator/config/manager/kustomization.yaml | 2 +- operator/config/rbac/kustomization.yaml | 10 +++++++--- ....yaml => production-stack_v1alpha1_vllmrouter.yaml} | 0 ...yaml => production-stack_v1alpha1_vllmruntime.yaml} | 2 +- operator/internal/controller/vllmruntime_controller.go | 10 +++++----- 5 files changed, 14 insertions(+), 10 deletions(-) rename operator/config/samples/{production-stack_v1alpha_vllmrouter.yaml => production-stack_v1alpha1_vllmrouter.yaml} (100%) rename operator/config/samples/{production-stack_v1alpha_vllmruntime.yaml => production-stack_v1alpha1_vllmruntime.yaml} (97%) diff --git a/operator/config/manager/kustomization.yaml b/operator/config/manager/kustomization.yaml index f3af0a933..cc9f03e24 100644 --- a/operator/config/manager/kustomization.yaml +++ b/operator/config/manager/kustomization.yaml @@ -5,5 +5,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization images: - name: controller - newName: controller + newName: 1nfinity/production-stack-controller newTag: latest diff --git a/operator/config/rbac/kustomization.yaml b/operator/config/rbac/kustomization.yaml index e47dad1b5..57100cf7c 100644 --- a/operator/config/rbac/kustomization.yaml +++ b/operator/config/rbac/kustomization.yaml @@ -25,9 +25,13 @@ resources: - cacheserver_admin_role.yaml - cacheserver_editor_role.yaml - cacheserver_viewer_role.yaml -- router_admin_role.yaml -- router_editor_role.yaml -- router_viewer_role.yaml +- vllmrouter_admin_role.yaml +- vllmrouter_editor_role.yaml +- vllmrouter_viewer_role.yaml - vllmruntime_admin_role.yaml - vllmruntime_editor_role.yaml - vllmruntime_viewer_role.yaml +# Pod viewer role is used to view pods for vllmrouter's service discovery +- pod_viewer_role.yaml +- vllmrouter_service_account.yaml +- vllmrouter_role_binding.yaml diff --git a/operator/config/samples/production-stack_v1alpha_vllmrouter.yaml b/operator/config/samples/production-stack_v1alpha1_vllmrouter.yaml similarity index 100% rename from operator/config/samples/production-stack_v1alpha_vllmrouter.yaml rename to operator/config/samples/production-stack_v1alpha1_vllmrouter.yaml diff --git a/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml b/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml similarity index 97% rename from operator/config/samples/production-stack_v1alpha_vllmruntime.yaml rename to operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml index a58a6c0ee..051f7b883 100644 --- a/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml +++ b/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml @@ -20,7 +20,7 @@ spec: lmCacheConfig: enabled: true cpuOffloadingBufferSize: "15" - diskOffloadingBufferSize: "8" + diskOffloadingBufferSize: "0" remoteUrl: "lm://cacheserver-sample.default.svc.cluster.local:80" remoteSerde: "naive" diff --git a/operator/internal/controller/vllmruntime_controller.go b/operator/internal/controller/vllmruntime_controller.go index 8725b9c15..394d38bb1 100644 --- a/operator/internal/controller/vllmruntime_controller.go +++ b/operator/internal/controller/vllmruntime_controller.go @@ -400,11 +400,11 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production Scheme: corev1.URISchemeHTTP, }, }, - InitialDelaySeconds: 10, - PeriodSeconds: 5, - TimeoutSeconds: 3, + InitialDelaySeconds: 30, + PeriodSeconds: 20, + TimeoutSeconds: 5, SuccessThreshold: 1, - FailureThreshold: 3, + FailureThreshold: 10, }, LivenessProbe: &corev1.Probe{ ProbeHandler: corev1.ProbeHandler{ @@ -414,7 +414,7 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production Scheme: corev1.URISchemeHTTP, }, }, - InitialDelaySeconds: 30, + InitialDelaySeconds: 240, PeriodSeconds: 10, TimeoutSeconds: 3, SuccessThreshold: 1, From a28c5f1d17bee3b34ee148623372f5e3a2cf0d72 Mon Sep 17 00:00:00 2001 From: royyhuang Date: Mon, 2 Jun 2025 23:48:15 +0000 Subject: [PATCH 08/14] restructure runtime crd spec Signed-off-by: royyhuang --- .pre-commit-config.yaml | 1 + operator/api/v1alpha1/vllmruntime_types.go | 58 ++++-- .../api/v1alpha1/zz_generated.deepcopy.go | 58 ++++-- ...production-stack.vllm.ai_vllmruntimes.yaml | 197 +++++++++--------- operator/config/rbac/pod_viewer_role.yaml | 17 -- .../config/rbac/vllmrouter_role_binding.yaml | 33 ++- .../rbac/vllmrouter_service_account.yaml | 8 - ...production-stack_v1alpha1_vllmruntime.yaml | 82 ++++---- .../controller/vllmruntime_controller.go | 153 +++++++------- 9 files changed, 340 insertions(+), 267 deletions(-) delete mode 100644 operator/config/rbac/pod_viewer_role.yaml delete mode 100644 operator/config/rbac/vllmrouter_service_account.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dd8a473ca..207012987 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,6 +9,7 @@ repos: - id: check-json - id: check-toml - id: check-yaml + args: ["--allow-multiple-documents"] exclude: | (?x)( ^helm/templates/| diff --git a/operator/api/v1alpha1/vllmruntime_types.go b/operator/api/v1alpha1/vllmruntime_types.go index 1ce19c2b7..6ac49e1fe 100644 --- a/operator/api/v1alpha1/vllmruntime_types.go +++ b/operator/api/v1alpha1/vllmruntime_types.go @@ -24,11 +24,41 @@ import ( // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. +// DeploymentConfig defines the deployment configuration +type DeploymentConfig struct { + // Replicas + // +kubebuilder:default=1 + Replicas int32 `json:"replicas,omitempty"` + + // Deploy strategy + // +kubebuilder:validation:Enum=RollingUpdate;Recreate + // +kubebuilder:default=RollingUpdate + DeployStrategy string `json:"deploymentStrategy,omitempty"` + + // Resource requirements + Resources ResourceRequirements `json:"resources"` + + // Image configuration + Image ImageSpec `json:"image"` +} + // VLLMRuntimeSpec defines the desired state of VLLMRuntime type VLLMRuntimeSpec struct { // Model configuration Model ModelSpec `json:"model"` + // vLLM server configuration + VLLMConfig VLLMConfig `json:"vllmConfig"` + + // LM Cache configuration + LMCacheConfig LMCacheConfig `json:"lmCacheConfig,omitempty"` + + // Deployment configuration + DeploymentConfig DeploymentConfig `json:"deploymentConfig"` +} + +// VLLMConfig defines the vLLM server configuration +type VLLMConfig struct { // Enable chunked prefill EnableChunkedPrefill bool `json:"enableChunkedPrefill,omitempty"` @@ -44,9 +74,6 @@ type VLLMRuntimeSpec struct { // Maximum number of LoRAs MaxLoras int32 `json:"maxLoras,omitempty"` - // LM Cache configuration - LMCacheConfig LMCacheConfig `json:"lmCacheConfig,omitempty"` - // Extra arguments for vllm serve ExtraArgs []string `json:"extraArgs,omitempty"` @@ -59,12 +86,12 @@ type VLLMRuntimeSpec struct { // Environment variables Env []EnvVar `json:"env,omitempty"` +} - // Resource requirements - Resources ResourceRequirements `json:"resources"` - - // Image configuration - Image ImageSpec `json:"image"` +// ModelSpec defines the model configuration +type ModelSpec struct { + // Model URL + ModelURL string `json:"modelURL"` // HuggingFace token secret HFTokenSecret corev1.LocalObjectReference `json:"hfTokenSecret,omitempty"` @@ -72,21 +99,6 @@ type VLLMRuntimeSpec struct { // +kubebuilder:validation:RequiredWhen=HFTokenSecret.Name!="" HFTokenName string `json:"hfTokenName,omitempty"` - // Replicas - // +kubebuilder:default=1 - Replicas int32 `json:"replicas,omitempty"` - - // Deploy strategy - // +kubebuilder:validation:Enum=RollingUpdate;Recreate - // +kubebuilder:default=RollingUpdate - DeployStrategy string `json:"deploymentStrategy,omitempty"` -} - -// ModelSpec defines the model configuration -type ModelSpec struct { - // Model URL - ModelURL string `json:"modelURL"` - // Enable LoRA EnableLoRA bool `json:"enableLoRA,omitempty"` diff --git a/operator/api/v1alpha1/zz_generated.deepcopy.go b/operator/api/v1alpha1/zz_generated.deepcopy.go index 21bbc16e8..cfacfb173 100644 --- a/operator/api/v1alpha1/zz_generated.deepcopy.go +++ b/operator/api/v1alpha1/zz_generated.deepcopy.go @@ -117,6 +117,23 @@ func (in *CacheServerStatus) DeepCopy() *CacheServerStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DeploymentConfig) DeepCopyInto(out *DeploymentConfig) { + *out = *in + out.Resources = in.Resources + out.Image = in.Image +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeploymentConfig. +func (in *DeploymentConfig) DeepCopy() *DeploymentConfig { + if in == nil { + return nil + } + out := new(DeploymentConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EnvVar) DeepCopyInto(out *EnvVar) { *out = *in @@ -165,6 +182,7 @@ func (in *LMCacheConfig) DeepCopy() *LMCacheConfig { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ModelSpec) DeepCopyInto(out *ModelSpec) { *out = *in + out.HFTokenSecret = in.HFTokenSecret } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelSpec. @@ -192,6 +210,31 @@ func (in *ResourceRequirements) DeepCopy() *ResourceRequirements { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VLLMConfig) DeepCopyInto(out *VLLMConfig) { + *out = *in + if in.ExtraArgs != nil { + in, out := &in.ExtraArgs, &out.ExtraArgs + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Env != nil { + in, out := &in.Env, &out.Env + *out = make([]EnvVar, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VLLMConfig. +func (in *VLLMConfig) DeepCopy() *VLLMConfig { + if in == nil { + return nil + } + out := new(VLLMConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VLLMRouter) DeepCopyInto(out *VLLMRouter) { *out = *in @@ -365,20 +408,9 @@ func (in *VLLMRuntimeList) DeepCopyObject() runtime.Object { func (in *VLLMRuntimeSpec) DeepCopyInto(out *VLLMRuntimeSpec) { *out = *in out.Model = in.Model + in.VLLMConfig.DeepCopyInto(&out.VLLMConfig) out.LMCacheConfig = in.LMCacheConfig - if in.ExtraArgs != nil { - in, out := &in.ExtraArgs, &out.ExtraArgs - *out = make([]string, len(*in)) - copy(*out, *in) - } - if in.Env != nil { - in, out := &in.Env, &out.Env - *out = make([]EnvVar, len(*in)) - copy(*out, *in) - } - out.Resources = in.Resources - out.Image = in.Image - out.HFTokenSecret = in.HFTokenSecret + out.DeploymentConfig = in.DeploymentConfig } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VLLMRuntimeSpec. diff --git a/operator/config/crd/bases/production-stack.vllm.ai_vllmruntimes.yaml b/operator/config/crd/bases/production-stack.vllm.ai_vllmruntimes.yaml index c3f21d2ac..44cc896cd 100644 --- a/operator/config/crd/bases/production-stack.vllm.ai_vllmruntimes.yaml +++ b/operator/config/crd/bases/production-stack.vllm.ai_vllmruntimes.yaml @@ -41,72 +41,49 @@ spec: spec: description: VLLMRuntimeSpec defines the desired state of VLLMRuntime properties: - deploymentStrategy: - default: RollingUpdate - description: Deploy strategy - enum: - - RollingUpdate - - Recreate - type: string - enableChunkedPrefill: - description: Enable chunked prefill - type: boolean - enablePrefixCaching: - description: Enable prefix caching - type: boolean - env: - description: Environment variables - items: - description: EnvVar represents an environment variable - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - extraArgs: - description: Extra arguments for vllm serve - items: - type: string - type: array - gpuMemoryUtilization: - description: GPU memory utilization - type: string - hfTokenName: - default: token - type: string - hfTokenSecret: - description: HuggingFace token secret + deploymentConfig: + description: Deployment configuration properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - image: - description: Image configuration - properties: - name: - type: string - pullPolicy: - type: string - pullSecretName: - type: string - registry: + deploymentStrategy: + default: RollingUpdate + description: Deploy strategy + enum: + - RollingUpdate + - Recreate type: string + image: + description: Image configuration + properties: + name: + type: string + pullPolicy: + type: string + pullSecretName: + type: string + registry: + type: string + required: + - name + - registry + type: object + replicas: + default: 1 + description: Replicas + format: int32 + type: integer + resources: + description: Resource requirements + properties: + cpu: + type: string + gpu: + type: string + memory: + type: string + type: object required: - - name - - registry + - image + - resources type: object lmCacheConfig: description: LM Cache configuration @@ -133,10 +110,6 @@ spec: description: RemoteURL is the URL of the remote cache server type: string type: object - maxLoras: - description: Maximum number of LoRAs - format: int32 - type: integer model: description: Model configuration properties: @@ -149,6 +122,23 @@ spec: enableTool: description: Enable tool type: boolean + hfTokenName: + default: token + type: string + hfTokenSecret: + description: HuggingFace token secret + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic maxModelLen: description: Maximum model length format: int32 @@ -166,37 +156,58 @@ spec: required: - modelURL type: object - port: - default: 8000 - description: Port for vLLM server - format: int32 - type: integer - replicas: - default: 1 - description: Replicas - format: int32 - type: integer - resources: - description: Resource requirements + vllmConfig: + description: vLLM server configuration properties: - cpu: - type: string - gpu: - type: string - memory: + enableChunkedPrefill: + description: Enable chunked prefill + type: boolean + enablePrefixCaching: + description: Enable prefix caching + type: boolean + env: + description: Environment variables + items: + description: EnvVar represents an environment variable + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + extraArgs: + description: Extra arguments for vllm serve + items: + type: string + type: array + gpuMemoryUtilization: + description: GPU memory utilization type: string + maxLoras: + description: Maximum number of LoRAs + format: int32 + type: integer + port: + default: 8000 + description: Port for vLLM server + format: int32 + type: integer + tensorParallelSize: + description: Tensor parallel size + format: int32 + type: integer + v1: + description: Use V1 API + type: boolean type: object - tensorParallelSize: - description: Tensor parallel size - format: int32 - type: integer - v1: - description: Use V1 API - type: boolean required: - - image + - deploymentConfig - model - - resources + - vllmConfig type: object status: description: VLLMRuntimeStatus defines the observed state of VLLMRuntime diff --git a/operator/config/rbac/pod_viewer_role.yaml b/operator/config/rbac/pod_viewer_role.yaml deleted file mode 100644 index b94a22369..000000000 --- a/operator/config/rbac/pod_viewer_role.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: pod-viewer-role - namespace: default - labels: - app.kubernetes.io/name: production-stack - app.kubernetes.io/managed-by: kustomize -rules: -- apiGroups: - - "" - resources: - - pods - verbs: - - get - - list - - watch diff --git a/operator/config/rbac/vllmrouter_role_binding.yaml b/operator/config/rbac/vllmrouter_role_binding.yaml index a29c577c8..2807c765e 100644 --- a/operator/config/rbac/vllmrouter_role_binding.yaml +++ b/operator/config/rbac/vllmrouter_role_binding.yaml @@ -1,4 +1,31 @@ apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: pod-viewer-role + namespace: default + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize +rules: + - apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: vllmrouter-sa + namespace: default + labels: + app.kubernetes.io/name: production-stack + app.kubernetes.io/managed-by: kustomize +--- +apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: pod-viewer-binding @@ -7,9 +34,9 @@ metadata: app.kubernetes.io/name: production-stack app.kubernetes.io/managed-by: kustomize subjects: -- kind: ServiceAccount - name: vllmrouter-sa - namespace: default + - kind: ServiceAccount + name: vllmrouter-sa + namespace: default roleRef: kind: Role name: pod-viewer-role diff --git a/operator/config/rbac/vllmrouter_service_account.yaml b/operator/config/rbac/vllmrouter_service_account.yaml deleted file mode 100644 index 4bb14d72b..000000000 --- a/operator/config/rbac/vllmrouter_service_account.yaml +++ /dev/null @@ -1,8 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: vllmrouter-sa - namespace: default - labels: - app.kubernetes.io/name: production-stack - app.kubernetes.io/managed-by: kustomize diff --git a/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml b/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml index 051f7b883..2d2ae3c2a 100644 --- a/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml +++ b/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml @@ -7,23 +7,6 @@ metadata: name: vllmruntime-sample spec: - # vLLM specific configurations - enableChunkedPrefill: false - enablePrefixCaching: false - tensorParallelSize: 1 - gpuMemoryUtilization: "0.8" - maxLoras: 4 - extraArgs: ["--disable-log-requests"] - v1: true - - # LM Cache configuration - lmCacheConfig: - enabled: true - cpuOffloadingBufferSize: "15" - diskOffloadingBufferSize: "0" - remoteUrl: "lm://cacheserver-sample.default.svc.cluster.local:80" - remoteSerde: "naive" - # Model configuration model: modelURL: "meta-llama/Llama-3.1-8B" @@ -33,31 +16,52 @@ spec: maxModelLen: 4096 dtype: "bfloat16" maxNumSeqs: 32 + # HuggingFace token secret (optional) + hfTokenSecret: + name: "huggingface-token" + hfTokenName: "token" - # Environment variables - env: - - name: HF_HOME - value: "/data" + # vLLM server configuration + vllmConfig: + # vLLM specific configurations + enableChunkedPrefill: false + enablePrefixCaching: false + tensorParallelSize: 1 + gpuMemoryUtilization: "0.8" + maxLoras: 4 + extraArgs: ["--disable-log-requests"] + v1: true + port: 8000 + # Environment variables + env: + - name: HF_HOME + value: "/data" - # Resource requirements - resources: - cpu: "10" - memory: "32Gi" - gpu: "1" + # LM Cache configuration + lmCacheConfig: + enabled: true + cpuOffloadingBufferSize: "15" + diskOffloadingBufferSize: "0" + remoteUrl: "lm://cacheserver-sample.default.svc.cluster.local:80" + remoteSerde: "naive" - # Image configuration - image: - registry: "docker.io" - name: "lmcache/vllm-openai:2025-04-18" - pullPolicy: "IfNotPresent" - pullSecretName: "" + # Deployment configuration + deploymentConfig: + # Resource requirements + resources: + cpu: "10" + memory: "32Gi" + gpu: "1" - # HuggingFace token secret (optional) - hfTokenSecret: - name: "huggingface-token" + # Image configuration + image: + registry: "docker.io" + name: "lmcache/vllm-openai:2025-04-18" + pullPolicy: "IfNotPresent" + pullSecretName: "" - # Number of replicas - replicas: 1 + # Number of replicas + replicas: 1 - # Deployment strategy - deploymentStrategy: "Recreate" + # Deployment strategy + deploymentStrategy: "Recreate" diff --git a/operator/internal/controller/vllmruntime_controller.go b/operator/internal/controller/vllmruntime_controller.go index 394d38bb1..41d46ec38 100644 --- a/operator/internal/controller/vllmruntime_controller.go +++ b/operator/internal/controller/vllmruntime_controller.go @@ -125,7 +125,7 @@ func (r *VLLMRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request) } // Update the deployment if needed - if r.deploymentNeedsUpdate(found, vllmRuntime) { + if r.deploymentNeedsUpdate(ctx, found, vllmRuntime) { log.Info("Updating Deployment", "Deployment.Namespace", found.Namespace, "Deployment.Name", found.Name) // Create new deployment spec newDep := r.deploymentForVLLMRuntime(vllmRuntime) @@ -154,14 +154,44 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production "app": vllmRuntime.Name, } + // Define probes + readinessProbe := &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/health", + Port: intstr.FromInt(int(vllmRuntime.Spec.VLLMConfig.Port)), + Scheme: corev1.URISchemeHTTP, + }, + }, + InitialDelaySeconds: 30, + PeriodSeconds: 20, + TimeoutSeconds: 5, + SuccessThreshold: 1, + FailureThreshold: 10, + } + + livenessProbe := &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/health", + Port: intstr.FromInt(int(vllmRuntime.Spec.VLLMConfig.Port)), + Scheme: corev1.URISchemeHTTP, + }, + }, + InitialDelaySeconds: 240, + PeriodSeconds: 10, + TimeoutSeconds: 3, + SuccessThreshold: 1, + FailureThreshold: 3, + } + // Build command line arguments args := []string{ - "--model", vllmRuntime.Spec.Model.ModelURL, "--host", "0.0.0.0", "--port", - fmt.Sprintf("%d", vllmRuntime.Spec.Port), + fmt.Sprintf("%d", vllmRuntime.Spec.VLLMConfig.Port), } if vllmRuntime.Spec.Model.EnableLoRA { @@ -176,13 +206,13 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production args = append(args, "--tool-call-parser", vllmRuntime.Spec.Model.ToolCallParser) } - if vllmRuntime.Spec.EnableChunkedPrefill { + if vllmRuntime.Spec.VLLMConfig.EnableChunkedPrefill { args = append(args, "--enable-chunked-prefill") } else { args = append(args, "--no-enable-chunked-prefill") } - if vllmRuntime.Spec.EnablePrefixCaching { + if vllmRuntime.Spec.VLLMConfig.EnablePrefixCaching { args = append(args, "--enable-prefix-caching") } else { args = append(args, "--no-enable-prefix-caching") @@ -196,29 +226,29 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production args = append(args, "--dtype", vllmRuntime.Spec.Model.DType) } - if vllmRuntime.Spec.TensorParallelSize > 0 { - args = append(args, "--tensor-parallel-size", fmt.Sprintf("%d", vllmRuntime.Spec.TensorParallelSize)) + if vllmRuntime.Spec.VLLMConfig.TensorParallelSize > 0 { + args = append(args, "--tensor-parallel-size", fmt.Sprintf("%d", vllmRuntime.Spec.VLLMConfig.TensorParallelSize)) } if vllmRuntime.Spec.Model.MaxNumSeqs > 0 { args = append(args, "--max-num-seqs", fmt.Sprintf("%d", vllmRuntime.Spec.Model.MaxNumSeqs)) } - if vllmRuntime.Spec.GpuMemoryUtilization != "" { - args = append(args, "--gpu_memory_utilization", vllmRuntime.Spec.GpuMemoryUtilization) + if vllmRuntime.Spec.VLLMConfig.GpuMemoryUtilization != "" { + args = append(args, "--gpu_memory_utilization", vllmRuntime.Spec.VLLMConfig.GpuMemoryUtilization) } - if vllmRuntime.Spec.MaxLoras > 0 { - args = append(args, "--max_loras", fmt.Sprintf("%d", vllmRuntime.Spec.MaxLoras)) + if vllmRuntime.Spec.VLLMConfig.MaxLoras > 0 { + args = append(args, "--max_loras", fmt.Sprintf("%d", vllmRuntime.Spec.VLLMConfig.MaxLoras)) } - if vllmRuntime.Spec.ExtraArgs != nil { - args = append(args, vllmRuntime.Spec.ExtraArgs...) + if vllmRuntime.Spec.VLLMConfig.ExtraArgs != nil { + args = append(args, vllmRuntime.Spec.VLLMConfig.ExtraArgs...) } // Build environment variables env := []corev1.EnvVar{} - if vllmRuntime.Spec.V1 { + if vllmRuntime.Spec.VLLMConfig.V1 { env = append(env, corev1.EnvVar{ Name: "VLLM_USE_V1", Value: "1", @@ -249,7 +279,7 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production // Add KV transfer config based on V1 flag var lmcache_config string - if vllmRuntime.Spec.V1 { + if vllmRuntime.Spec.VLLMConfig.V1 { lmcache_config = `{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}` } else { lmcache_config = `{"kv_connector":"LMCacheConnector","kv_role":"kv_both"}` @@ -297,8 +327,8 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production } // Add user-defined environment variables - if vllmRuntime.Spec.Env != nil { - for _, e := range vllmRuntime.Spec.Env { + if vllmRuntime.Spec.VLLMConfig.Env != nil { + for _, e := range vllmRuntime.Spec.VLLMConfig.Env { env = append(env, corev1.EnvVar{ Name: e.Name, Value: e.Value, @@ -312,47 +342,47 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production Limits: corev1.ResourceList{}, } - if vllmRuntime.Spec.Resources.CPU != "" { - resources.Requests[corev1.ResourceCPU] = resource.MustParse(vllmRuntime.Spec.Resources.CPU) - resources.Limits[corev1.ResourceCPU] = resource.MustParse(vllmRuntime.Spec.Resources.CPU) + if vllmRuntime.Spec.DeploymentConfig.Resources.CPU != "" { + resources.Requests[corev1.ResourceCPU] = resource.MustParse(vllmRuntime.Spec.DeploymentConfig.Resources.CPU) + resources.Limits[corev1.ResourceCPU] = resource.MustParse(vllmRuntime.Spec.DeploymentConfig.Resources.CPU) } - if vllmRuntime.Spec.Resources.Memory != "" { - resources.Requests[corev1.ResourceMemory] = resource.MustParse(vllmRuntime.Spec.Resources.Memory) - resources.Limits[corev1.ResourceMemory] = resource.MustParse(vllmRuntime.Spec.Resources.Memory) + if vllmRuntime.Spec.DeploymentConfig.Resources.Memory != "" { + resources.Requests[corev1.ResourceMemory] = resource.MustParse(vllmRuntime.Spec.DeploymentConfig.Resources.Memory) + resources.Limits[corev1.ResourceMemory] = resource.MustParse(vllmRuntime.Spec.DeploymentConfig.Resources.Memory) } - if vllmRuntime.Spec.Resources.GPU != "" { + if vllmRuntime.Spec.DeploymentConfig.Resources.GPU != "" { // Parse GPU resource as a decimal value - gpuResource := resource.MustParse(vllmRuntime.Spec.Resources.GPU) + gpuResource := resource.MustParse(vllmRuntime.Spec.DeploymentConfig.Resources.GPU) resources.Requests["nvidia.com/gpu"] = gpuResource resources.Limits["nvidia.com/gpu"] = gpuResource } // Get the image from Image spec or use default - image := vllmRuntime.Spec.Image.Registry + "/" + vllmRuntime.Spec.Image.Name + image := vllmRuntime.Spec.DeploymentConfig.Image.Registry + "/" + vllmRuntime.Spec.DeploymentConfig.Image.Name // Get the image pull policy imagePullPolicy := corev1.PullIfNotPresent - if vllmRuntime.Spec.Image.PullPolicy != "" { - imagePullPolicy = corev1.PullPolicy(vllmRuntime.Spec.Image.PullPolicy) + if vllmRuntime.Spec.DeploymentConfig.Image.PullPolicy != "" { + imagePullPolicy = corev1.PullPolicy(vllmRuntime.Spec.DeploymentConfig.Image.PullPolicy) } // Build image pull secrets var imagePullSecrets []corev1.LocalObjectReference - if vllmRuntime.Spec.Image.PullSecretName != "" { + if vllmRuntime.Spec.DeploymentConfig.Image.PullSecretName != "" { imagePullSecrets = append(imagePullSecrets, corev1.LocalObjectReference{ - Name: vllmRuntime.Spec.Image.PullSecretName, + Name: vllmRuntime.Spec.DeploymentConfig.Image.PullSecretName, }) } - if vllmRuntime.Spec.HFTokenSecret.Name != "" { + if vllmRuntime.Spec.Model.HFTokenSecret.Name != "" { env = append(env, corev1.EnvVar{ Name: "HF_TOKEN", ValueFrom: &corev1.EnvVarSource{ SecretKeyRef: &corev1.SecretKeySelector{ - LocalObjectReference: vllmRuntime.Spec.HFTokenSecret, - Key: vllmRuntime.Spec.HFTokenName, + LocalObjectReference: vllmRuntime.Spec.Model.HFTokenSecret, + Key: vllmRuntime.Spec.Model.HFTokenName, }, }, }) @@ -364,9 +394,9 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production Namespace: vllmRuntime.Namespace, }, Spec: appsv1.DeploymentSpec{ - Replicas: &vllmRuntime.Spec.Replicas, + Replicas: &vllmRuntime.Spec.DeploymentConfig.Replicas, Strategy: appsv1.DeploymentStrategy{ - Type: appsv1.DeploymentStrategyType(vllmRuntime.Spec.DeployStrategy), + Type: appsv1.DeploymentStrategyType(vllmRuntime.Spec.DeploymentConfig.DeployStrategy), }, Selector: &metav1.LabelSelector{ MatchLabels: labels, @@ -382,44 +412,18 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production Name: "vllm", Image: image, ImagePullPolicy: imagePullPolicy, - Command: []string{"python3", "-m", "vllm.entrypoints.openai.api_server"}, + Command: []string{"/opt/venv/bin/vllm", "serve"}, Args: args, Env: env, Ports: []corev1.ContainerPort{ { Name: "http", - ContainerPort: vllmRuntime.Spec.Port, + ContainerPort: vllmRuntime.Spec.VLLMConfig.Port, }, }, - Resources: resources, - ReadinessProbe: &corev1.Probe{ - ProbeHandler: corev1.ProbeHandler{ - HTTPGet: &corev1.HTTPGetAction{ - Path: "/health", - Port: intstr.FromInt(int(vllmRuntime.Spec.Port)), - Scheme: corev1.URISchemeHTTP, - }, - }, - InitialDelaySeconds: 30, - PeriodSeconds: 20, - TimeoutSeconds: 5, - SuccessThreshold: 1, - FailureThreshold: 10, - }, - LivenessProbe: &corev1.Probe{ - ProbeHandler: corev1.ProbeHandler{ - HTTPGet: &corev1.HTTPGetAction{ - Path: "/health", - Port: intstr.FromInt(int(vllmRuntime.Spec.Port)), - Scheme: corev1.URISchemeHTTP, - }, - }, - InitialDelaySeconds: 240, - PeriodSeconds: 10, - TimeoutSeconds: 3, - SuccessThreshold: 1, - FailureThreshold: 3, - }, + Resources: resources, + ReadinessProbe: readinessProbe, + LivenessProbe: livenessProbe, }, }, }, @@ -433,7 +437,9 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production } // deploymentNeedsUpdate checks if the deployment needs to be updated -func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr *productionstackv1alpha1.VLLMRuntime) bool { +func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(ctx context.Context, dep *appsv1.Deployment, vr *productionstackv1alpha1.VLLMRuntime) bool { + + log := log.FromContext(ctx) // Generate the expected deployment expectedDep := r.deploymentForVLLMRuntime(vr) @@ -442,21 +448,24 @@ func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr actualModelURL := "" // For vllm serve, the model URL is the first argument after the command if len(dep.Spec.Template.Spec.Containers[0].Args) > 0 { - actualModelURL = dep.Spec.Template.Spec.Containers[0].Args[1] + actualModelURL = dep.Spec.Template.Spec.Containers[0].Args[0] } if expectedModelURL != actualModelURL { + log.Info("Model URL mismatch", "expected", expectedModelURL, "actual", actualModelURL) return true } // Compare port - expectedPort := vr.Spec.Port + expectedPort := vr.Spec.VLLMConfig.Port actualPort := dep.Spec.Template.Spec.Containers[0].Ports[0].ContainerPort if expectedPort != actualPort { + log.Info("Port mismatch", "expected", expectedPort, "actual", actualPort) return true } // Compare image if expectedDep.Spec.Template.Spec.Containers[0].Image != dep.Spec.Template.Spec.Containers[0].Image { + log.Info("Image mismatch", "expected", expectedDep.Spec.Template.Spec.Containers[0].Image, "actual", dep.Spec.Template.Spec.Containers[0].Image) return true } @@ -464,6 +473,7 @@ func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr expectedResources := expectedDep.Spec.Template.Spec.Containers[0].Resources actualResources := dep.Spec.Template.Spec.Containers[0].Resources if !reflect.DeepEqual(expectedResources, actualResources) { + log.Info("Resources mismatch", "expected", expectedResources, "actual", actualResources) return true } @@ -499,6 +509,7 @@ func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr expectedLMCacheConfig.DiskOffloadingBufferSize != actualDiskOffloadingBufferSize || expectedLMCacheConfig.RemoteURL != actualRemoteURL || expectedLMCacheConfig.RemoteSerde != actualRemoteSerde { + log.Info("LM Cache configuration mismatch", "expected", expectedLMCacheConfig, "actual", actualLMCacheConfig) return true } @@ -549,7 +560,7 @@ func (r *VLLMRuntimeReconciler) serviceForVLLMRuntime(vllmRuntime *productionsta { Name: "http", Port: 80, - TargetPort: intstr.FromInt(int(vllmRuntime.Spec.Port)), + TargetPort: intstr.FromInt(int(vllmRuntime.Spec.VLLMConfig.Port)), Protocol: corev1.ProtocolTCP, }, }, @@ -564,7 +575,7 @@ func (r *VLLMRuntimeReconciler) serviceForVLLMRuntime(vllmRuntime *productionsta // serviceNeedsUpdate checks if the service needs to be updated func (r *VLLMRuntimeReconciler) serviceNeedsUpdate(svc *corev1.Service, vr *productionstackv1alpha1.VLLMRuntime) bool { // Compare target port - expectedTargetPort := int(vr.Spec.Port) + expectedTargetPort := int(vr.Spec.VLLMConfig.Port) actualTargetPort := svc.Spec.Ports[0].TargetPort.IntValue() if expectedTargetPort != actualTargetPort { return true From 622ea3048bb5f9bcec01345af97b6601197d3a9f Mon Sep 17 00:00:00 2001 From: royyhuang Date: Fri, 6 Jun 2025 04:15:50 +0000 Subject: [PATCH 09/14] add default operator manifest Signed-off-by: royyhuang --- operator/config/manager/kustomization.yaml | 10 +- operator/config/rbac/kustomization.yaml | 2 - operator/default.yaml | 1471 ++++++++++++++++++++ 3 files changed, 1476 insertions(+), 7 deletions(-) create mode 100644 operator/default.yaml diff --git a/operator/config/manager/kustomization.yaml b/operator/config/manager/kustomization.yaml index 5e6ad7ce9..cc9f03e24 100644 --- a/operator/config/manager/kustomization.yaml +++ b/operator/config/manager/kustomization.yaml @@ -1,9 +1,9 @@ resources: - - namespace.yaml - - deployment.yaml +- namespace.yaml +- deployment.yaml apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization images: - - name: controller - newName: lmcache/operator - newTag: latest +- name: controller + newName: 1nfinity/production-stack-controller + newTag: latest diff --git a/operator/config/rbac/kustomization.yaml b/operator/config/rbac/kustomization.yaml index 2041c9e0f..54248b6f0 100644 --- a/operator/config/rbac/kustomization.yaml +++ b/operator/config/rbac/kustomization.yaml @@ -35,6 +35,4 @@ resources: - vllmruntime_editor_role.yaml - vllmruntime_viewer_role.yaml # Pod viewer role is used to view pods for vllmrouter's service discovery - - pod_viewer_role.yaml - - vllmrouter_service_account.yaml - vllmrouter_role_binding.yaml diff --git a/operator/default.yaml b/operator/default.yaml new file mode 100644 index 000000000..848c64670 --- /dev/null +++ b/operator/default.yaml @@ -0,0 +1,1471 @@ +apiVersion: v1 +kind: Namespace +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + control-plane: controller-manager + name: production-stack-system +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.2 + name: cacheservers.production-stack.vllm.ai +spec: + group: production-stack.vllm.ai + names: + kind: CacheServer + listKind: CacheServerList + plural: cacheservers + singular: cacheserver + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.status + name: Status + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: CacheServer is the Schema for the cacheservers API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: CacheServerSpec defines the desired state of CacheServer + properties: + deploymentStrategy: + default: RollingUpdate + description: Deployment strategy + enum: + - RollingUpdate + - Recreate + type: string + image: + description: Image configuration for the cache server + properties: + name: + type: string + pullPolicy: + type: string + pullSecretName: + type: string + registry: + type: string + required: + - name + - registry + type: object + port: + default: 8000 + description: Container port for the cache server + format: int32 + type: integer + replicas: + default: 1 + description: Number of replicas + format: int32 + type: integer + resources: + description: Resource requirements + properties: + cpu: + type: string + gpu: + type: string + memory: + type: string + type: object + required: + - deploymentStrategy + - image + - port + - replicas + - resources + type: object + status: + description: CacheServerStatus defines the observed state of CacheServer + properties: + lastUpdated: + description: Last time the status was updated + format: date-time + type: string + status: + description: Current status of the cache server + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.2 + name: loraadapters.production-stack.vllm.ai +spec: + group: production-stack.vllm.ai + names: + kind: LoraAdapter + listKind: LoraAdapterList + plural: loraadapters + singular: loraadapter + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.phase + name: Phase + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: LoraAdapter is the Schema for the loraadapters API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: LoraAdapterSpec defines the desired state of LoraAdapter. + properties: + adapterSource: + description: AdapterSource defines where to get the LoRA adapter from. + properties: + adapterName: + description: AdapterName is the name of the adapter to apply. + type: string + adapterPath: + description: 'AdapterPath is the path to the LoRA adapter weights. + For local sources: required, specifies the path to the adapter + For remote sources: optional, will be updated by the controller + with the download path' + type: string + credentialsSecretRef: + description: CredentialsSecretRef references a secret containing + storage credentials. + properties: + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + type: object + x-kubernetes-map-type: atomic + maxAdapters: + description: MaxAdapters is the maximum number of adapters to + load. + format: int32 + type: integer + pattern: + description: Pattern is the pattern to use for the adapter name. + type: string + repository: + description: Repository is the repository to get the LoRA adapter + from. + type: string + type: + description: Type is the type of the adapter source. + enum: + - local + - s3 + - http + - huggingface + type: string + required: + - adapterName + - type + type: object + baseModel: + description: BaseModel is the name of the base model this adapter + is for. + type: string + loraAdapterDeploymentConfig: + description: DeploymentConfig defines how the adapter should be deployed + properties: + algorithm: + default: default + description: Algorithm specifies which placement algorithm to + use. + enum: + - default + - ordered + - equalized + type: string + replicas: + description: Replicas is the number of replicas that should load + this adapter. + format: int32 + minimum: 0 + type: integer + required: + - algorithm + type: object + vllmApiKey: + description: VLLMApiKey defines the configuration for vLLM API key + authentication + properties: + secretRef: + description: Reference to a secret containing the API key + properties: + secretKey: + description: Key in the secret containing the API key + type: string + secretName: + description: Name of the secret + type: string + required: + - secretKey + - secretName + type: object + value: + description: Direct API key value + type: string + type: object + required: + - adapterSource + - baseModel + type: object + status: + description: LoraAdapterStatus defines the observed state of LoraAdapter. + properties: + conditions: + description: Condition contains details for one aspect of the current + state of this API Resource. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: LastTransitionTime is the last time the condition + transitioned from one status to another. + format: date-time + type: string + message: + description: Message is a human-readable message indicating + details about why the current state is set. + maxLength: 32768 + type: string + reason: + description: Reason is a brief reason for the condition's current + status. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: Status is the status of the condition. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + loadedAdapters: + description: LoadedAdapters tracks the loading status of adapters + and their pod assignments. + items: + description: LoadedAdapter represents an adapter that has been loaded + into a pod + properties: + loadTime: + description: LoadTime is when the adapter was loaded + format: date-time + type: string + name: + description: Name is the name of the adapter + type: string + path: + description: Path is the path where the adapter is loaded + type: string + podAssignments: + description: PodAssignments represents the pods this adapter + has been assigned to + properties: + namespace: + description: Namespace is the namespace of the pod + type: string + podName: + description: Pod represents the pod information + type: string + required: + - namespace + - podName + type: object + status: + description: Status is the status of the adapter + type: string + required: + - name + - path + - podAssignments + - status + type: object + type: array + message: + description: Message provides additional information about the current + phase. + type: string + observedGeneration: + description: ObservedGeneration represents the .metadata.generation + that the condition was set based upon. + format: int64 + minimum: 0 + type: integer + phase: + description: Phase represents the current phase of the adapter deployment. + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.2 + name: vllmrouters.production-stack.vllm.ai +spec: + group: production-stack.vllm.ai + names: + kind: VLLMRouter + listKind: VLLMRouterList + plural: vllmrouters + singular: vllmrouter + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: VLLMRouter is the Schema for the vllmrouters API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: VLLMRouterSpec defines the desired state of VLLMRouter + properties: + enableRouter: + default: true + description: EnableRouter determines if the router should be deployed + type: boolean + engineScrapeInterval: + description: EngineScrapeInterval for collecting engine statistics + format: int32 + type: integer + env: + description: Environment variables + items: + description: EnvVar represents an environment variable + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + extraArgs: + description: ExtraArgs for additional router arguments + items: + type: string + type: array + image: + description: Image configuration + properties: + name: + type: string + pullPolicy: + type: string + pullSecretName: + type: string + registry: + type: string + required: + - name + - registry + type: object + k8sLabelSelector: + description: K8sLabelSelector specifies the label selector for vLLM + runtime pods when using k8s service discovery + type: string + nodeSelectorTerms: + description: NodeSelectorTerms for pod scheduling + items: + description: |- + A null or empty node selector term matches no objects. The requirements of + them are ANDed. + The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector requirements by node's + labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector requirements by node's + fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + type: array + port: + default: 80 + description: ContainerPort for the router service + format: int32 + type: integer + replicas: + default: 1 + description: Replicas specifies the number of router replicas + format: int32 + type: integer + requestStatsWindow: + description: RequestStatsWindow for request statistics + format: int32 + type: integer + resources: + description: Resource requirements + properties: + cpu: + type: string + gpu: + type: string + memory: + type: string + type: object + routingLogic: + default: roundrobin + description: RoutingLogic specifies the routing strategy + enum: + - roundrobin + - session + type: string + serviceAccountName: + description: ServiceAccountName for the router pod + type: string + serviceDiscovery: + default: k8s + description: ServiceDiscovery specifies the service discovery method + (k8s or static) + enum: + - k8s + - static + type: string + sessionKey: + default: "" + description: SessionKey for session-based routing + type: string + staticBackends: + description: StaticBackends is required when using static service + discovery + type: string + staticModels: + description: StaticModels is required when using static service discovery + type: string + vllmApiKeyName: + type: string + vllmApiKeySecret: + description: VLLM API Key configuration + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + required: + - image + - resources + type: object + status: + description: VLLMRouterStatus defines the observed state of VLLMRouter + properties: + activeRuntimes: + description: Number of active runtimes + format: int32 + type: integer + lastUpdated: + description: Last updated timestamp + format: date-time + type: string + status: + description: Router status + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.2 + name: vllmruntimes.production-stack.vllm.ai +spec: + group: production-stack.vllm.ai + names: + kind: VLLMRuntime + listKind: VLLMRuntimeList + plural: vllmruntimes + shortNames: + - vr + singular: vllmruntime + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: VLLMRuntime is the Schema for the vllmruntimes API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: VLLMRuntimeSpec defines the desired state of VLLMRuntime + properties: + deploymentConfig: + description: Deployment configuration + properties: + deploymentStrategy: + default: RollingUpdate + description: Deploy strategy + enum: + - RollingUpdate + - Recreate + type: string + image: + description: Image configuration + properties: + name: + type: string + pullPolicy: + type: string + pullSecretName: + type: string + registry: + type: string + required: + - name + - registry + type: object + replicas: + default: 1 + description: Replicas + format: int32 + type: integer + resources: + description: Resource requirements + properties: + cpu: + type: string + gpu: + type: string + memory: + type: string + type: object + required: + - image + - resources + type: object + lmCacheConfig: + description: LM Cache configuration + properties: + cpuOffloadingBufferSize: + default: 4Gi + description: CPUOffloadingBufferSize is the size of the CPU offloading + buffer + type: string + diskOffloadingBufferSize: + default: 8Gi + description: DiskOffloadingBufferSize is the size of the disk + offloading buffer + type: string + enabled: + default: false + description: Enabled enables LM Cache + type: boolean + remoteSerde: + description: RemoteSerde is the serialization format for the remote + cache + type: string + remoteUrl: + description: RemoteURL is the URL of the remote cache server + type: string + type: object + model: + description: Model configuration + properties: + dtype: + description: Data type + type: string + enableLoRA: + description: Enable LoRA + type: boolean + enableTool: + description: Enable tool + type: boolean + hfTokenName: + default: token + type: string + hfTokenSecret: + description: HuggingFace token secret + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + maxModelLen: + description: Maximum model length + format: int32 + type: integer + maxNumSeqs: + description: Maximum number of sequences + format: int32 + type: integer + modelURL: + description: Model URL + type: string + toolCallParser: + description: Tool call parser + type: string + required: + - modelURL + type: object + vllmConfig: + description: vLLM server configuration + properties: + enableChunkedPrefill: + description: Enable chunked prefill + type: boolean + enablePrefixCaching: + description: Enable prefix caching + type: boolean + env: + description: Environment variables + items: + description: EnvVar represents an environment variable + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + extraArgs: + description: Extra arguments for vllm serve + items: + type: string + type: array + gpuMemoryUtilization: + description: GPU memory utilization + type: string + maxLoras: + description: Maximum number of LoRAs + format: int32 + type: integer + port: + default: 8000 + description: Port for vLLM server + format: int32 + type: integer + tensorParallelSize: + description: Tensor parallel size + format: int32 + type: integer + v1: + description: Use V1 API + type: boolean + type: object + required: + - deploymentConfig + - model + - vllmConfig + type: object + status: + description: VLLMRuntimeStatus defines the observed state of VLLMRuntime + properties: + lastUpdated: + description: Last updated timestamp + format: date-time + type: string + modelStatus: + description: Model status + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + name: production-stack-controller-manager + namespace: production-stack-system +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + name: production-stack-vllmrouter-sa + namespace: production-stack-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + name: production-stack-leader-election-role + namespace: production-stack-system +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + name: production-stack-pod-viewer-role + namespace: production-stack-system +rules: +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + name: production-stack-cacheserver-admin-role +rules: +- apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers + verbs: + - '*' +- apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + name: production-stack-cacheserver-editor-role +rules: +- apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + name: production-stack-cacheserver-viewer-role +rules: +- apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers + verbs: + - get + - list + - watch +- apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + name: production-stack-loraadapter-admin-role +rules: +- apiGroups: + - production-stack.vllm.ai + resources: + - loraadapters + verbs: + - '*' +- apiGroups: + - production-stack.vllm.ai + resources: + - loraadapters/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + name: production-stack-loraadapter-editor-role +rules: +- apiGroups: + - production-stack.vllm.ai + resources: + - loraadapters + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - production-stack.vllm.ai + resources: + - loraadapters/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + name: production-stack-loraadapter-viewer-role +rules: +- apiGroups: + - production-stack.vllm.ai + resources: + - loraadapters + verbs: + - get + - list + - watch +- apiGroups: + - production-stack.vllm.ai + resources: + - loraadapters/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: production-stack-manager-role +rules: +- apiGroups: + - "" + resources: + - configmaps + - secrets + - services + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - deployments + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers + - loraadapters + - vllmrouters + - vllmruntimes + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers/finalizers + - loraadapters/finalizers + - vllmrouters/finalizers + - vllmruntimes/finalizers + verbs: + - update +- apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers/status + - loraadapters/status + - vllmrouters/status + - vllmruntimes/status + verbs: + - get + - patch + - update +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: production-stack-metrics-auth-role +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: production-stack-metrics-reader +rules: +- nonResourceURLs: + - /metrics + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + name: production-stack-vllmrouter-admin-role +rules: +- apiGroups: + - production-stack.vllm.ai + resources: + - vllmrouters + verbs: + - '*' +- apiGroups: + - production-stack.vllm.ai + resources: + - vllmrouters/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + name: production-stack-vllmrouter-editor-role +rules: +- apiGroups: + - production-stack.vllm.ai + resources: + - vllmrouters + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - production-stack.vllm.ai + resources: + - vllmrouters/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + name: production-stack-vllmrouter-viewer-role +rules: +- apiGroups: + - production-stack.vllm.ai + resources: + - vllmrouters + verbs: + - get + - list + - watch +- apiGroups: + - production-stack.vllm.ai + resources: + - vllmrouters/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + name: production-stack-vllmruntime-admin-role +rules: +- apiGroups: + - production-stack.vllm.ai + resources: + - vllmruntimes + verbs: + - '*' +- apiGroups: + - production-stack.vllm.ai + resources: + - vllmruntimes/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + name: production-stack-vllmruntime-editor-role +rules: +- apiGroups: + - production-stack.vllm.ai + resources: + - vllmruntimes + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - production-stack.vllm.ai + resources: + - vllmruntimes/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + name: production-stack-vllmruntime-viewer-role +rules: +- apiGroups: + - production-stack.vllm.ai + resources: + - vllmruntimes + verbs: + - get + - list + - watch +- apiGroups: + - production-stack.vllm.ai + resources: + - vllmruntimes/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + name: production-stack-leader-election-rolebinding + namespace: production-stack-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: production-stack-leader-election-role +subjects: +- kind: ServiceAccount + name: production-stack-controller-manager + namespace: production-stack-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + name: production-stack-pod-viewer-binding + namespace: production-stack-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: production-stack-pod-viewer-role +subjects: +- kind: ServiceAccount + name: production-stack-vllmrouter-sa + namespace: production-stack-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + name: production-stack-manager-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: production-stack-manager-role +subjects: +- kind: ServiceAccount + name: production-stack-controller-manager + namespace: production-stack-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: production-stack-metrics-auth-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: production-stack-metrics-auth-role +subjects: +- kind: ServiceAccount + name: production-stack-controller-manager + namespace: production-stack-system +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + control-plane: controller-manager + name: production-stack-controller-manager-metrics-service + namespace: production-stack-system +spec: + ports: + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + app.kubernetes.io/name: production-stack + control-plane: controller-manager +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/component: manager + app.kubernetes.io/created-by: production-stack + app.kubernetes.io/instance: production-stack + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: production-stack + app.kubernetes.io/part-of: production-stack + name: production-stack-production-stack-controller-manager + namespace: production-stack-system +spec: + selector: + matchLabels: + app.kubernetes.io/component: manager + app.kubernetes.io/instance: production-stack + app.kubernetes.io/name: production-stack + template: + metadata: + labels: + app.kubernetes.io/component: manager + app.kubernetes.io/instance: production-stack + app.kubernetes.io/name: production-stack + spec: + containers: + - args: + - --metrics-bind-address=:8443 + - --leader-elect + - --health-probe-bind-address=:8081 + command: + - /manager + image: 1nfinity/production-stack-controller:latest + imagePullPolicy: Always + livenessProbe: + httpGet: + path: /healthz + port: 8081 + initialDelaySeconds: 15 + periodSeconds: 20 + name: manager + ports: [] + readinessProbe: + httpGet: + path: /readyz + port: 8081 + initialDelaySeconds: 5 + periodSeconds: 10 + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 10m + memory: 64Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + volumeMounts: [] + securityContext: + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + serviceAccountName: production-stack-controller-manager + terminationGracePeriodSeconds: 10 + volumes: [] From f5c9b28702b4eeb13f99f579ae8751a0081076c0 Mon Sep 17 00:00:00 2001 From: royyhuang Date: Fri, 6 Jun 2025 06:00:28 +0000 Subject: [PATCH 10/14] move rbac for vllm-router pod to controller Signed-off-by: royyhuang --- operator/{ => config}/default.yaml | 58 +-- operator/config/default/kustomization.yaml | 414 +++++++++--------- operator/config/rbac/kustomization.yaml | 2 - operator/config/rbac/role.yaml | 152 ++++--- operator/config/rbac/role_binding.yaml | 6 +- .../config/rbac/vllmrouter_role_binding.yaml | 43 -- .../controller/vllmrouter_controller.go | 125 ++++++ 7 files changed, 432 insertions(+), 368 deletions(-) rename operator/{ => config}/default.yaml (97%) delete mode 100644 operator/config/rbac/vllmrouter_role_binding.yaml diff --git a/operator/default.yaml b/operator/config/default.yaml similarity index 97% rename from operator/default.yaml rename to operator/config/default.yaml index 848c64670..85242a851 100644 --- a/operator/default.yaml +++ b/operator/config/default.yaml @@ -864,15 +864,6 @@ metadata: name: production-stack-controller-manager namespace: production-stack-system --- -apiVersion: v1 -kind: ServiceAccount -metadata: - labels: - app.kubernetes.io/managed-by: kustomize - app.kubernetes.io/name: production-stack - name: production-stack-vllmrouter-sa - namespace: production-stack-system ---- apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: @@ -915,24 +906,6 @@ rules: - patch --- apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - labels: - app.kubernetes.io/managed-by: kustomize - app.kubernetes.io/name: production-stack - name: production-stack-pod-viewer-role - namespace: production-stack-system -rules: -- apiGroups: - - "" - resources: - - pods - verbs: - - get - - list - - watch ---- -apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: labels: @@ -1084,6 +1057,7 @@ rules: resources: - configmaps - secrets + - serviceaccounts - services verbs: - create @@ -1148,6 +1122,19 @@ rules: - get - patch - update +- apiGroups: + - rbac.authorization.k8s.io + resources: + - rolebindings + - roles + verbs: + - create + - delete + - get + - list + - patch + - update + - watch --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -1337,23 +1324,6 @@ subjects: namespace: production-stack-system --- apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - labels: - app.kubernetes.io/managed-by: kustomize - app.kubernetes.io/name: production-stack - name: production-stack-pod-viewer-binding - namespace: production-stack-system -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: production-stack-pod-viewer-role -subjects: -- kind: ServiceAccount - name: production-stack-vllmrouter-sa - namespace: production-stack-system ---- -apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: labels: diff --git a/operator/config/default/kustomization.yaml b/operator/config/default/kustomization.yaml index db3e47347..a4089b35f 100644 --- a/operator/config/default/kustomization.yaml +++ b/operator/config/default/kustomization.yaml @@ -15,220 +15,220 @@ namePrefix: production-stack- # someName: someValue resources: -- ../crd -- ../rbac -- ../manager -# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in -# crd/kustomization.yaml -#- ../webhook -# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required. -#- ../certmanager -# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. -#- ../prometheus -# [METRICS] Expose the controller manager metrics service. -- metrics_service.yaml + - ../crd + - ../rbac + - ../manager + # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in + # crd/kustomization.yaml + #- ../webhook + # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required. + #- ../certmanager + # [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. + #- ../prometheus + # [METRICS] Expose the controller manager metrics service. + - metrics_service.yaml # [NETWORK POLICY] Protect the /metrics endpoint and Webhook Server with NetworkPolicy. # Only Pod(s) running a namespace labeled with 'metrics: enabled' will be able to gather the metrics. # Only CR(s) which requires webhooks and are applied on namespaces labeled with 'webhooks: enabled' will # be able to communicate with the Webhook Server. #- ../network-policy -# Uncomment the patches line if you enable Metrics +# Remove transformers section and add patches patches: -# [METRICS] The following patch will enable the metrics endpoint using HTTPS and the port :8443. -# More info: https://book.kubebuilder.io/reference/metrics -- path: manager_metrics_patch.yaml - target: - kind: Deployment + # [METRICS] The following patch will enable the metrics endpoint using HTTPS and the port :8443. + # More info: https://book.kubebuilder.io/reference/metrics + - path: manager_metrics_patch.yaml + target: + kind: Deployment -# Uncomment the patches line if you enable Metrics and CertManager -# [METRICS-WITH-CERTS] To enable metrics protected with certManager, uncomment the following line. -# This patch will protect the metrics with certManager self-signed certs. -#- path: cert_metrics_manager_patch.yaml -# target: -# kind: Deployment + # Uncomment the patches line if you enable Metrics and CertManager + # [METRICS-WITH-CERTS] To enable metrics protected with certManager, uncomment the following line. + # This patch will protect the metrics with certManager self-signed certs. + #- path: cert_metrics_manager_patch.yaml + # target: + # kind: Deployment -# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in -# crd/kustomization.yaml -#- path: manager_webhook_patch.yaml -# target: -# kind: Deployment + # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in + # crd/kustomization.yaml + #- path: manager_webhook_patch.yaml + # target: + # kind: Deployment -# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix. -# Uncomment the following replacements to add the cert-manager CA injection annotations -#replacements: -# - source: # Uncomment the following block to enable certificates for metrics -# kind: Service -# version: v1 -# name: controller-manager-metrics-service -# fieldPath: metadata.name -# targets: -# - select: -# kind: Certificate -# group: cert-manager.io -# version: v1 -# name: metrics-certs -# fieldPaths: -# - spec.dnsNames.0 -# - spec.dnsNames.1 -# options: -# delimiter: '.' -# index: 0 -# create: true -# - select: # Uncomment the following to set the Service name for TLS config in Prometheus ServiceMonitor -# kind: ServiceMonitor -# group: monitoring.coreos.com -# version: v1 -# name: controller-manager-metrics-monitor -# fieldPaths: -# - spec.endpoints.0.tlsConfig.serverName -# options: -# delimiter: '.' -# index: 0 -# create: true -# -# - source: -# kind: Service -# version: v1 -# name: controller-manager-metrics-service -# fieldPath: metadata.namespace -# targets: -# - select: -# kind: Certificate -# group: cert-manager.io -# version: v1 -# name: metrics-certs -# fieldPaths: -# - spec.dnsNames.0 -# - spec.dnsNames.1 -# options: -# delimiter: '.' -# index: 1 -# create: true -# - select: # Uncomment the following to set the Service namespace for TLS in Prometheus ServiceMonitor -# kind: ServiceMonitor -# group: monitoring.coreos.com -# version: v1 -# name: controller-manager-metrics-monitor -# fieldPaths: -# - spec.endpoints.0.tlsConfig.serverName -# options: -# delimiter: '.' -# index: 1 -# create: true -# -# - source: # Uncomment the following block if you have any webhook -# kind: Service -# version: v1 -# name: webhook-service -# fieldPath: .metadata.name # Name of the service -# targets: -# - select: -# kind: Certificate -# group: cert-manager.io -# version: v1 -# name: serving-cert -# fieldPaths: -# - .spec.dnsNames.0 -# - .spec.dnsNames.1 -# options: -# delimiter: '.' -# index: 0 -# create: true -# - source: -# kind: Service -# version: v1 -# name: webhook-service -# fieldPath: .metadata.namespace # Namespace of the service -# targets: -# - select: -# kind: Certificate -# group: cert-manager.io -# version: v1 -# name: serving-cert -# fieldPaths: -# - .spec.dnsNames.0 -# - .spec.dnsNames.1 -# options: -# delimiter: '.' -# index: 1 -# create: true -# -# - source: # Uncomment the following block if you have a ValidatingWebhook (--programmatic-validation) -# kind: Certificate -# group: cert-manager.io -# version: v1 -# name: serving-cert # This name should match the one in certificate.yaml -# fieldPath: .metadata.namespace # Namespace of the certificate CR -# targets: -# - select: -# kind: ValidatingWebhookConfiguration -# fieldPaths: -# - .metadata.annotations.[cert-manager.io/inject-ca-from] -# options: -# delimiter: '/' -# index: 0 -# create: true -# - source: -# kind: Certificate -# group: cert-manager.io -# version: v1 -# name: serving-cert -# fieldPath: .metadata.name -# targets: -# - select: -# kind: ValidatingWebhookConfiguration -# fieldPaths: -# - .metadata.annotations.[cert-manager.io/inject-ca-from] -# options: -# delimiter: '/' -# index: 1 -# create: true -# -# - source: # Uncomment the following block if you have a DefaultingWebhook (--defaulting ) -# kind: Certificate -# group: cert-manager.io -# version: v1 -# name: serving-cert -# fieldPath: .metadata.namespace # Namespace of the certificate CR -# targets: -# - select: -# kind: MutatingWebhookConfiguration -# fieldPaths: -# - .metadata.annotations.[cert-manager.io/inject-ca-from] -# options: -# delimiter: '/' -# index: 0 -# create: true -# - source: -# kind: Certificate -# group: cert-manager.io -# version: v1 -# name: serving-cert -# fieldPath: .metadata.name -# targets: -# - select: -# kind: MutatingWebhookConfiguration -# fieldPaths: -# - .metadata.annotations.[cert-manager.io/inject-ca-from] -# options: -# delimiter: '/' -# index: 1 -# create: true -# -# - source: # Uncomment the following block if you have a ConversionWebhook (--conversion) -# kind: Certificate -# group: cert-manager.io -# version: v1 -# name: serving-cert -# fieldPath: .metadata.namespace # Namespace of the certificate CR -# targets: # Do not remove or uncomment the following scaffold marker; required to generate code for target CRD. -# +kubebuilder:scaffold:crdkustomizecainjectionns -# - source: -# kind: Certificate -# group: cert-manager.io -# version: v1 -# name: serving-cert -# fieldPath: .metadata.name -# targets: # Do not remove or uncomment the following scaffold marker; required to generate code for target CRD. -# +kubebuilder:scaffold:crdkustomizecainjectionname + # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix. + # Uncomment the following replacements to add the cert-manager CA injection annotations + #replacements: + # - source: # Uncomment the following block to enable certificates for metrics + # kind: Service + # version: v1 + # name: controller-manager-metrics-service + # fieldPath: metadata.name + # targets: + # - select: + # kind: Certificate + # group: cert-manager.io + # version: v1 + # name: metrics-certs + # fieldPaths: + # - spec.dnsNames.0 + # - spec.dnsNames.1 + # options: + # delimiter: '.' + # index: 0 + # create: true + # - select: # Uncomment the following to set the Service name for TLS config in Prometheus ServiceMonitor + # kind: ServiceMonitor + # group: monitoring.coreos.com + # version: v1 + # name: controller-manager-metrics-monitor + # fieldPaths: + # - spec.endpoints.0.tlsConfig.serverName + # options: + # delimiter: '.' + # index: 0 + # create: true + # + # - source: + # kind: Service + # version: v1 + # name: controller-manager-metrics-service + # fieldPath: metadata.namespace + # targets: + # - select: + # kind: Certificate + # group: cert-manager.io + # version: v1 + # name: metrics-certs + # fieldPaths: + # - spec.dnsNames.0 + # - spec.dnsNames.1 + # options: + # delimiter: '.' + # index: 1 + # create: true + # - select: # Uncomment the following to set the Service namespace for TLS in Prometheus ServiceMonitor + # kind: ServiceMonitor + # group: monitoring.coreos.com + # version: v1 + # name: controller-manager-metrics-monitor + # fieldPaths: + # - spec.endpoints.0.tlsConfig.serverName + # options: + # delimiter: '.' + # index: 1 + # create: true + # + # - source: # Uncomment the following block if you have any webhook + # kind: Service + # version: v1 + # name: webhook-service + # fieldPath: .metadata.name # Name of the service + # targets: + # - select: + # kind: Certificate + # group: cert-manager.io + # version: v1 + # name: serving-cert + # fieldPaths: + # - .spec.dnsNames.0 + # - .spec.dnsNames.1 + # options: + # delimiter: '.' + # index: 0 + # create: true + # - source: + # kind: Service + # version: v1 + # name: webhook-service + # fieldPath: .metadata.namespace # Namespace of the service + # targets: + # - select: + # kind: Certificate + # group: cert-manager.io + # version: v1 + # name: serving-cert + # fieldPaths: + # - .spec.dnsNames.0 + # - .spec.dnsNames.1 + # options: + # delimiter: '.' + # index: 1 + # create: true + # + # - source: # Uncomment the following block if you have a ValidatingWebhook (--programmatic-validation) + # kind: Certificate + # group: cert-manager.io + # version: v1 + # name: serving-cert # This name should match the one in certificate.yaml + # fieldPath: .metadata.namespace # Namespace of the certificate CR + # targets: + # - select: + # kind: ValidatingWebhookConfiguration + # fieldPaths: + # - .metadata.annotations.[cert-manager.io/inject-ca-from] + # options: + # delimiter: '/' + # index: 0 + # create: true + # - source: + # kind: Certificate + # group: cert-manager.io + # version: v1 + # name: serving-cert + # fieldPath: .metadata.name + # targets: + # - select: + # kind: ValidatingWebhookConfiguration + # fieldPaths: + # - .metadata.annotations.[cert-manager.io/inject-ca-from] + # options: + # delimiter: '/' + # index: 1 + # create: true + # + # - source: # Uncomment the following block if you have a DefaultingWebhook (--defaulting ) + # kind: Certificate + # group: cert-manager.io + # version: v1 + # name: serving-cert + # fieldPath: .metadata.namespace # Namespace of the certificate CR + # targets: + # - select: + # kind: MutatingWebhookConfiguration + # fieldPaths: + # - .metadata.annotations.[cert-manager.io/inject-ca-from] + # options: + # delimiter: '/' + # index: 0 + # create: true + # - source: + # kind: Certificate + # group: cert-manager.io + # version: v1 + # name: serving-cert + # fieldPath: .metadata.name + # targets: + # - select: + # kind: MutatingWebhookConfiguration + # fieldPaths: + # - .metadata.annotations.[cert-manager.io/inject-ca-from] + # options: + # delimiter: '/' + # index: 1 + # create: true + # + # - source: # Uncomment the following block if you have a ConversionWebhook (--conversion) + # kind: Certificate + # group: cert-manager.io + # version: v1 + # name: serving-cert + # fieldPath: .metadata.namespace # Namespace of the certificate CR + # targets: # Do not remove or uncomment the following scaffold marker; required to generate code for target CRD. + # +kubebuilder:scaffold:crdkustomizecainjectionns + # - source: + # kind: Certificate + # group: cert-manager.io + # version: v1 + # name: serving-cert + # fieldPath: .metadata.name + # targets: # Do not remove or uncomment the following scaffold marker; required to generate code for target CRD. + # +kubebuilder:scaffold:crdkustomizecainjectionname diff --git a/operator/config/rbac/kustomization.yaml b/operator/config/rbac/kustomization.yaml index 54248b6f0..9f20ab9e2 100644 --- a/operator/config/rbac/kustomization.yaml +++ b/operator/config/rbac/kustomization.yaml @@ -34,5 +34,3 @@ resources: - vllmruntime_admin_role.yaml - vllmruntime_editor_role.yaml - vllmruntime_viewer_role.yaml - # Pod viewer role is used to view pods for vllmrouter's service discovery - - vllmrouter_role_binding.yaml diff --git a/operator/config/rbac/role.yaml b/operator/config/rbac/role.yaml index 9dedd14d2..8803b3f5c 100644 --- a/operator/config/rbac/role.yaml +++ b/operator/config/rbac/role.yaml @@ -4,72 +4,86 @@ kind: ClusterRole metadata: name: manager-role rules: -- apiGroups: - - "" - resources: - - configmaps - - secrets - - services - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - "" - resources: - - pods - verbs: - - get - - list - - watch -- apiGroups: - - apps - resources: - - deployments - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - production-stack.vllm.ai - resources: - - cacheservers - - loraadapters - - vllmrouters - - vllmruntimes - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - production-stack.vllm.ai - resources: - - cacheservers/finalizers - - loraadapters/finalizers - - vllmrouters/finalizers - - vllmruntimes/finalizers - verbs: - - update -- apiGroups: - - production-stack.vllm.ai - resources: - - cacheservers/status - - loraadapters/status - - vllmrouters/status - - vllmruntimes/status - verbs: - - get - - patch - - update + - apiGroups: + - "" + resources: + - configmaps + - secrets + - serviceaccounts + - services + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch + - apiGroups: + - apps + resources: + - deployments + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers + - loraadapters + - vllmrouters + - vllmruntimes + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers/finalizers + - loraadapters/finalizers + - vllmrouters/finalizers + - vllmruntimes/finalizers + verbs: + - update + - apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers/status + - loraadapters/status + - vllmrouters/status + - vllmruntimes/status + verbs: + - get + - patch + - update + - apiGroups: + - rbac.authorization.k8s.io + resources: + - rolebindings + - roles + verbs: + - create + - delete + - get + - list + - patch + - update + - watch diff --git a/operator/config/rbac/role_binding.yaml b/operator/config/rbac/role_binding.yaml index b61dbe83f..ab3cc7b55 100644 --- a/operator/config/rbac/role_binding.yaml +++ b/operator/config/rbac/role_binding.yaml @@ -10,6 +10,6 @@ roleRef: kind: ClusterRole name: manager-role subjects: -- kind: ServiceAccount - name: controller-manager - namespace: system + - kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/operator/config/rbac/vllmrouter_role_binding.yaml b/operator/config/rbac/vllmrouter_role_binding.yaml deleted file mode 100644 index 2807c765e..000000000 --- a/operator/config/rbac/vllmrouter_role_binding.yaml +++ /dev/null @@ -1,43 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: pod-viewer-role - namespace: default - labels: - app.kubernetes.io/name: production-stack - app.kubernetes.io/managed-by: kustomize -rules: - - apiGroups: - - "" - resources: - - pods - verbs: - - get - - list - - watch ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: vllmrouter-sa - namespace: default - labels: - app.kubernetes.io/name: production-stack - app.kubernetes.io/managed-by: kustomize ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: pod-viewer-binding - namespace: default - labels: - app.kubernetes.io/name: production-stack - app.kubernetes.io/managed-by: kustomize -subjects: - - kind: ServiceAccount - name: vllmrouter-sa - namespace: default -roleRef: - kind: Role - name: pod-viewer-role - apiGroup: rbac.authorization.k8s.io diff --git a/operator/internal/controller/vllmrouter_controller.go b/operator/internal/controller/vllmrouter_controller.go index 3bd198bf7..793ffb0f8 100644 --- a/operator/internal/controller/vllmrouter_controller.go +++ b/operator/internal/controller/vllmrouter_controller.go @@ -23,6 +23,7 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -51,6 +52,9 @@ type VLLMRouterReconciler struct { // +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=roles,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=rolebindings,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=core,resources=serviceaccounts,verbs=get;list;watch;create;update;patch;delete // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. @@ -72,6 +76,60 @@ func (r *VLLMRouterReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, err } + // Create ServiceAccount if it doesn't exist + sa := &corev1.ServiceAccount{} + err = r.Get(ctx, types.NamespacedName{Name: "vllmrouter-sa", Namespace: router.Namespace}, sa) + if err != nil && errors.IsNotFound(err) { + sa = r.serviceAccountForVLLMRouter(router) + log.Info("Creating a new ServiceAccount", "ServiceAccount.Namespace", sa.Namespace, "ServiceAccount.Name", sa.Name) + err = r.Create(ctx, sa) + if err != nil { + log.Error(err, "Failed to create new ServiceAccount", "ServiceAccount.Namespace", sa.Namespace, "ServiceAccount.Name", sa.Name) + return ctrl.Result{}, err + } + // ServiceAccount created successfully - return and requeue + return ctrl.Result{Requeue: true}, nil + } else if err != nil { + log.Error(err, "Failed to get ServiceAccount") + return ctrl.Result{}, err + } + + // Create Role if it doesn't exist + role := &rbacv1.Role{} + err = r.Get(ctx, types.NamespacedName{Name: "pod-viewer-role", Namespace: router.Namespace}, role) + if err != nil && errors.IsNotFound(err) { + role = r.roleForVLLMRouter(router) + log.Info("Creating a new Role", "Role.Namespace", role.Namespace, "Role.Name", role.Name) + err = r.Create(ctx, role) + if err != nil { + log.Error(err, "Failed to create new Role", "Role.Namespace", role.Namespace, "Role.Name", role.Name) + return ctrl.Result{}, err + } + // Role created successfully - return and requeue + return ctrl.Result{Requeue: true}, nil + } else if err != nil { + log.Error(err, "Failed to get Role") + return ctrl.Result{}, err + } + + // Create RoleBinding if it doesn't exist + roleBinding := &rbacv1.RoleBinding{} + err = r.Get(ctx, types.NamespacedName{Name: "pod-viewer-binding", Namespace: router.Namespace}, roleBinding) + if err != nil && errors.IsNotFound(err) { + roleBinding = r.roleBindingForVLLMRouter(router) + log.Info("Creating a new RoleBinding", "RoleBinding.Namespace", roleBinding.Namespace, "RoleBinding.Name", roleBinding.Name) + err = r.Create(ctx, roleBinding) + if err != nil { + log.Error(err, "Failed to create new RoleBinding", "RoleBinding.Namespace", roleBinding.Namespace, "RoleBinding.Name", roleBinding.Name) + return ctrl.Result{}, err + } + // RoleBinding created successfully - return and requeue + return ctrl.Result{Requeue: true}, nil + } else if err != nil { + log.Error(err, "Failed to get RoleBinding") + return ctrl.Result{}, err + } + // Check if the service already exists, if not create a new one foundService := &corev1.Service{} err = r.Get(ctx, types.NamespacedName{Name: router.Name, Namespace: router.Namespace}, foundService) @@ -380,6 +438,73 @@ func (r *VLLMRouterReconciler) serviceForVLLMRouter(router *servingv1alpha1.VLLM return svc } +// serviceAccountForVLLMRouter returns a ServiceAccount object +func (r *VLLMRouterReconciler) serviceAccountForVLLMRouter(router *servingv1alpha1.VLLMRouter) *corev1.ServiceAccount { + sa := &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "vllmrouter-sa", + Namespace: router.Namespace, + Labels: map[string]string{ + "app.kubernetes.io/name": "production-stack", + "app.kubernetes.io/managed-by": "kustomize", + }, + }, + } + ctrl.SetControllerReference(router, sa, r.Scheme) + return sa +} + +// roleForVLLMRouter returns a Role object +func (r *VLLMRouterReconciler) roleForVLLMRouter(router *servingv1alpha1.VLLMRouter) *rbacv1.Role { + role := &rbacv1.Role{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-viewer-role", + Namespace: router.Namespace, + Labels: map[string]string{ + "app.kubernetes.io/name": "production-stack", + "app.kubernetes.io/managed-by": "kustomize", + }, + }, + Rules: []rbacv1.PolicyRule{ + { + APIGroups: []string{""}, + Resources: []string{"pods"}, + Verbs: []string{"get", "list", "watch"}, + }, + }, + } + ctrl.SetControllerReference(router, role, r.Scheme) + return role +} + +// roleBindingForVLLMRouter returns a RoleBinding object +func (r *VLLMRouterReconciler) roleBindingForVLLMRouter(router *servingv1alpha1.VLLMRouter) *rbacv1.RoleBinding { + roleBinding := &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-viewer-binding", + Namespace: router.Namespace, + Labels: map[string]string{ + "app.kubernetes.io/name": "production-stack", + "app.kubernetes.io/managed-by": "kustomize", + }, + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: "vllmrouter-sa", + Namespace: router.Namespace, + }, + }, + RoleRef: rbacv1.RoleRef{ + Kind: "Role", + Name: "pod-viewer-role", + APIGroup: "rbac.authorization.k8s.io", + }, + } + ctrl.SetControllerReference(router, roleBinding, r.Scheme) + return roleBinding +} + // SetupWithManager sets up the controller with the Manager. func (r *VLLMRouterReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). From e984ecd3ac72644ed0fd847f9399f0e98ded7217 Mon Sep 17 00:00:00 2001 From: royyhuang Date: Fri, 6 Jun 2025 06:07:07 +0000 Subject: [PATCH 11/14] use service account name defined in vllm-router cr to create service account instead of hardcoding Signed-off-by: royyhuang --- operator/config/rbac/role.yaml | 166 +++++++++--------- .../controller/vllmrouter_controller.go | 6 +- 2 files changed, 86 insertions(+), 86 deletions(-) diff --git a/operator/config/rbac/role.yaml b/operator/config/rbac/role.yaml index 8803b3f5c..a2f2d5f46 100644 --- a/operator/config/rbac/role.yaml +++ b/operator/config/rbac/role.yaml @@ -4,86 +4,86 @@ kind: ClusterRole metadata: name: manager-role rules: - - apiGroups: - - "" - resources: - - configmaps - - secrets - - serviceaccounts - - services - verbs: - - create - - delete - - get - - list - - patch - - update - - watch - - apiGroups: - - "" - resources: - - pods - verbs: - - get - - list - - watch - - apiGroups: - - apps - resources: - - deployments - verbs: - - create - - delete - - get - - list - - patch - - update - - watch - - apiGroups: - - production-stack.vllm.ai - resources: - - cacheservers - - loraadapters - - vllmrouters - - vllmruntimes - verbs: - - create - - delete - - get - - list - - patch - - update - - watch - - apiGroups: - - production-stack.vllm.ai - resources: - - cacheservers/finalizers - - loraadapters/finalizers - - vllmrouters/finalizers - - vllmruntimes/finalizers - verbs: - - update - - apiGroups: - - production-stack.vllm.ai - resources: - - cacheservers/status - - loraadapters/status - - vllmrouters/status - - vllmruntimes/status - verbs: - - get - - patch - - update - - apiGroups: - - rbac.authorization.k8s.io - resources: - - rolebindings - - roles - verbs: - - create - - delete - - get - - list - - patch - - update - - watch +- apiGroups: + - "" + resources: + - configmaps + - secrets + - serviceaccounts + - services + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - deployments + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers + - loraadapters + - vllmrouters + - vllmruntimes + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers/finalizers + - loraadapters/finalizers + - vllmrouters/finalizers + - vllmruntimes/finalizers + verbs: + - update +- apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers/status + - loraadapters/status + - vllmrouters/status + - vllmruntimes/status + verbs: + - get + - patch + - update +- apiGroups: + - rbac.authorization.k8s.io + resources: + - rolebindings + - roles + verbs: + - create + - delete + - get + - list + - patch + - update + - watch diff --git a/operator/internal/controller/vllmrouter_controller.go b/operator/internal/controller/vllmrouter_controller.go index 793ffb0f8..91320091c 100644 --- a/operator/internal/controller/vllmrouter_controller.go +++ b/operator/internal/controller/vllmrouter_controller.go @@ -78,7 +78,7 @@ func (r *VLLMRouterReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Create ServiceAccount if it doesn't exist sa := &corev1.ServiceAccount{} - err = r.Get(ctx, types.NamespacedName{Name: "vllmrouter-sa", Namespace: router.Namespace}, sa) + err = r.Get(ctx, types.NamespacedName{Name: router.Spec.ServiceAccountName, Namespace: router.Namespace}, sa) if err != nil && errors.IsNotFound(err) { sa = r.serviceAccountForVLLMRouter(router) log.Info("Creating a new ServiceAccount", "ServiceAccount.Namespace", sa.Namespace, "ServiceAccount.Name", sa.Name) @@ -442,7 +442,7 @@ func (r *VLLMRouterReconciler) serviceForVLLMRouter(router *servingv1alpha1.VLLM func (r *VLLMRouterReconciler) serviceAccountForVLLMRouter(router *servingv1alpha1.VLLMRouter) *corev1.ServiceAccount { sa := &corev1.ServiceAccount{ ObjectMeta: metav1.ObjectMeta{ - Name: "vllmrouter-sa", + Name: router.Spec.ServiceAccountName, Namespace: router.Namespace, Labels: map[string]string{ "app.kubernetes.io/name": "production-stack", @@ -491,7 +491,7 @@ func (r *VLLMRouterReconciler) roleBindingForVLLMRouter(router *servingv1alpha1. Subjects: []rbacv1.Subject{ { Kind: "ServiceAccount", - Name: "vllmrouter-sa", + Name: router.Spec.ServiceAccountName, Namespace: router.Namespace, }, }, From d7fa743c563b52657de65c98681aa9fd757a4d5f Mon Sep 17 00:00:00 2001 From: royyhuang Date: Sat, 7 Jun 2025 06:05:06 +0000 Subject: [PATCH 12/14] update sample loraadapter manifest Signed-off-by: royyhuang --- .../samples/production-stack_v1alpha1_loraadapter.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/operator/config/samples/production-stack_v1alpha1_loraadapter.yaml b/operator/config/samples/production-stack_v1alpha1_loraadapter.yaml index d83e0a847..960017ab8 100644 --- a/operator/config/samples/production-stack_v1alpha1_loraadapter.yaml +++ b/operator/config/samples/production-stack_v1alpha1_loraadapter.yaml @@ -17,9 +17,9 @@ spec: # vllmApiKey: # value: "abc123" adapterSource: - type: "local" # (local, huggingface, s3) for now we only support local - adapterName: "llama-3.1-nemoguard-8b-topic-control" # This will be the adapter ID + type: "local" # (local, huggingface, s3) for now we only support local + adapterName: "llama-3.1-nemoguard-8b-topic-control" # This will be the adapter ID adapterPath: "/data/lora-adapters/llama-3.1-nemoguard-8b-topic-control" # This will be the path to the adapter in the persistent volume - deploymentConfig: + loraAdapterDeploymentConfig: algorithm: "default" # for now we only support default algorithm replicas: 1 # if not specified, by default algorithm, the lora adapter will be applied to all llama3-8b models, if specified, the lora adapter will only be applied to the specified number of replicas From fed80a87f4f9dd1ad0467af26d7c8770cc784f99 Mon Sep 17 00:00:00 2001 From: royyhuang Date: Sat, 7 Jun 2025 06:27:23 +0000 Subject: [PATCH 13/14] update default controller image Signed-off-by: royyhuang --- operator/config/default.yaml | 2 +- operator/config/default/kustomization.yaml | 5 +++++ operator/config/default/manager_image_patch.yaml | 11 +++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 operator/config/default/manager_image_patch.yaml diff --git a/operator/config/default.yaml b/operator/config/default.yaml index 85242a851..5b71fb8ec 100644 --- a/operator/config/default.yaml +++ b/operator/config/default.yaml @@ -1403,7 +1403,7 @@ spec: - --health-probe-bind-address=:8081 command: - /manager - image: 1nfinity/production-stack-controller:latest + image: lmcache/production-stack-operator:latest imagePullPolicy: Always livenessProbe: httpGet: diff --git a/operator/config/default/kustomization.yaml b/operator/config/default/kustomization.yaml index a4089b35f..b092f90d4 100644 --- a/operator/config/default/kustomization.yaml +++ b/operator/config/default/kustomization.yaml @@ -41,6 +41,11 @@ patches: target: kind: Deployment + # Set the controller image + - path: manager_image_patch.yaml + target: + kind: Deployment + # Uncomment the patches line if you enable Metrics and CertManager # [METRICS-WITH-CERTS] To enable metrics protected with certManager, uncomment the following line. # This patch will protect the metrics with certManager self-signed certs. diff --git a/operator/config/default/manager_image_patch.yaml b/operator/config/default/manager_image_patch.yaml new file mode 100644 index 000000000..ec444896c --- /dev/null +++ b/operator/config/default/manager_image_patch.yaml @@ -0,0 +1,11 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: controller-manager + namespace: system +spec: + template: + spec: + containers: + - name: manager + image: lmcache/production-stack-operator:latest From c5793d33d02a518a1f1b6f5bb05323ea7dda1d04 Mon Sep 17 00:00:00 2001 From: royyhuang Date: Sat, 7 Jun 2025 06:30:35 +0000 Subject: [PATCH 14/14] remove old image name patch Signed-off-by: royyhuang --- operator/config/default.yaml | 2229 ++++++++++---------- operator/config/manager/kustomization.yaml | 10 +- 2 files changed, 1130 insertions(+), 1109 deletions(-) diff --git a/operator/config/default.yaml b/operator/config/default.yaml index 5b71fb8ec..cce3f977a 100644 --- a/operator/config/default.yaml +++ b/operator/config/default.yaml @@ -22,103 +22,103 @@ spec: singular: cacheserver scope: Namespaced versions: - - additionalPrinterColumns: - - jsonPath: .status.status - name: Status - type: string - - jsonPath: .metadata.creationTimestamp - name: Age - type: date - name: v1alpha1 - schema: - openAPIV3Schema: - description: CacheServer is the Schema for the cacheservers API - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: CacheServerSpec defines the desired state of CacheServer - properties: - deploymentStrategy: - default: RollingUpdate - description: Deployment strategy - enum: - - RollingUpdate - - Recreate - type: string - image: - description: Image configuration for the cache server - properties: - name: - type: string - pullPolicy: - type: string - pullSecretName: - type: string - registry: - type: string - required: - - name - - registry - type: object - port: - default: 8000 - description: Container port for the cache server - format: int32 - type: integer - replicas: - default: 1 - description: Number of replicas - format: int32 - type: integer - resources: - description: Resource requirements - properties: - cpu: - type: string - gpu: - type: string - memory: - type: string - type: object - required: - - deploymentStrategy - - image - - port - - replicas - - resources - type: object - status: - description: CacheServerStatus defines the observed state of CacheServer - properties: - lastUpdated: - description: Last time the status was updated - format: date-time - type: string - status: - description: Current status of the cache server - type: string - type: object - type: object - served: true - storage: true - subresources: - status: {} + - additionalPrinterColumns: + - jsonPath: .status.status + name: Status + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: CacheServer is the Schema for the cacheservers API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: CacheServerSpec defines the desired state of CacheServer + properties: + deploymentStrategy: + default: RollingUpdate + description: Deployment strategy + enum: + - RollingUpdate + - Recreate + type: string + image: + description: Image configuration for the cache server + properties: + name: + type: string + pullPolicy: + type: string + pullSecretName: + type: string + registry: + type: string + required: + - name + - registry + type: object + port: + default: 8000 + description: Container port for the cache server + format: int32 + type: integer + replicas: + default: 1 + description: Number of replicas + format: int32 + type: integer + resources: + description: Resource requirements + properties: + cpu: + type: string + gpu: + type: string + memory: + type: string + type: object + required: + - deploymentStrategy + - image + - port + - replicas + - resources + type: object + status: + description: CacheServerStatus defines the observed state of CacheServer + properties: + lastUpdated: + description: Last time the status was updated + format: date-time + type: string + status: + description: Current status of the cache server + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} --- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition @@ -135,241 +135,260 @@ spec: singular: loraadapter scope: Namespaced versions: - - additionalPrinterColumns: - - jsonPath: .status.phase - name: Phase - type: string - - jsonPath: .metadata.creationTimestamp - name: Age - type: date - name: v1alpha1 - schema: - openAPIV3Schema: - description: LoraAdapter is the Schema for the loraadapters API. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: LoraAdapterSpec defines the desired state of LoraAdapter. - properties: - adapterSource: - description: AdapterSource defines where to get the LoRA adapter from. - properties: - adapterName: - description: AdapterName is the name of the adapter to apply. - type: string - adapterPath: - description: 'AdapterPath is the path to the LoRA adapter weights. - For local sources: required, specifies the path to the adapter - For remote sources: optional, will be updated by the controller - with the download path' - type: string - credentialsSecretRef: - description: CredentialsSecretRef references a secret containing - storage credentials. - properties: - name: - description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid?' - type: string - type: object - x-kubernetes-map-type: atomic - maxAdapters: - description: MaxAdapters is the maximum number of adapters to - load. - format: int32 - type: integer - pattern: - description: Pattern is the pattern to use for the adapter name. - type: string - repository: - description: Repository is the repository to get the LoRA adapter - from. - type: string - type: - description: Type is the type of the adapter source. - enum: - - local - - s3 - - http - - huggingface - type: string - required: - - adapterName - - type - type: object - baseModel: - description: BaseModel is the name of the base model this adapter - is for. - type: string - loraAdapterDeploymentConfig: - description: DeploymentConfig defines how the adapter should be deployed - properties: - algorithm: - default: default - description: Algorithm specifies which placement algorithm to - use. - enum: - - default - - ordered - - equalized - type: string - replicas: - description: Replicas is the number of replicas that should load - this adapter. - format: int32 - minimum: 0 - type: integer - required: - - algorithm - type: object - vllmApiKey: - description: VLLMApiKey defines the configuration for vLLM API key - authentication - properties: - secretRef: - description: Reference to a secret containing the API key - properties: - secretKey: - description: Key in the secret containing the API key - type: string - secretName: - description: Name of the secret - type: string - required: - - secretKey - - secretName - type: object - value: - description: Direct API key value - type: string - type: object - required: - - adapterSource - - baseModel - type: object - status: - description: LoraAdapterStatus defines the observed state of LoraAdapter. - properties: - conditions: - description: Condition contains details for one aspect of the current - state of this API Resource. - items: - description: Condition contains details for one aspect of the current - state of this API Resource. + - additionalPrinterColumns: + - jsonPath: .status.phase + name: Phase + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: LoraAdapter is the Schema for the loraadapters API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: LoraAdapterSpec defines the desired state of LoraAdapter. + properties: + adapterSource: + description: AdapterSource defines where to get the LoRA adapter from. properties: - lastTransitionTime: - description: LastTransitionTime is the last time the condition - transitioned from one status to another. - format: date-time + adapterName: + description: AdapterName is the name of the adapter to apply. type: string - message: - description: Message is a human-readable message indicating - details about why the current state is set. - maxLength: 32768 + adapterPath: + description: + "AdapterPath is the path to the LoRA adapter weights. + For local sources: required, specifies the path to the adapter + For remote sources: optional, will be updated by the controller + with the download path" type: string - reason: - description: Reason is a brief reason for the condition's current - status. - maxLength: 1024 - minLength: 1 - pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + credentialsSecretRef: + description: + CredentialsSecretRef references a secret containing + storage credentials. + properties: + name: + description: + "Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, uid?" + type: string + type: object + x-kubernetes-map-type: atomic + maxAdapters: + description: + MaxAdapters is the maximum number of adapters to + load. + format: int32 + type: integer + pattern: + description: Pattern is the pattern to use for the adapter name. type: string - status: - description: Status is the status of the condition. - enum: - - "True" - - "False" - - Unknown + repository: + description: + Repository is the repository to get the LoRA adapter + from. type: string type: - description: type of condition in CamelCase. - maxLength: 316 - pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + description: Type is the type of the adapter source. + enum: + - local + - s3 + - http + - huggingface type: string required: - - lastTransitionTime - - message - - reason - - status - - type + - adapterName + - type type: object - type: array - loadedAdapters: - description: LoadedAdapters tracks the loading status of adapters - and their pod assignments. - items: - description: LoadedAdapter represents an adapter that has been loaded - into a pod + baseModel: + description: + BaseModel is the name of the base model this adapter + is for. + type: string + loraAdapterDeploymentConfig: + description: DeploymentConfig defines how the adapter should be deployed properties: - loadTime: - description: LoadTime is when the adapter was loaded - format: date-time - type: string - name: - description: Name is the name of the adapter - type: string - path: - description: Path is the path where the adapter is loaded + algorithm: + default: default + description: + Algorithm specifies which placement algorithm to + use. + enum: + - default + - ordered + - equalized type: string - podAssignments: - description: PodAssignments represents the pods this adapter - has been assigned to + replicas: + description: + Replicas is the number of replicas that should load + this adapter. + format: int32 + minimum: 0 + type: integer + required: + - algorithm + type: object + vllmApiKey: + description: + VLLMApiKey defines the configuration for vLLM API key + authentication + properties: + secretRef: + description: Reference to a secret containing the API key properties: - namespace: - description: Namespace is the namespace of the pod + secretKey: + description: Key in the secret containing the API key type: string - podName: - description: Pod represents the pod information + secretName: + description: Name of the secret type: string required: - - namespace - - podName + - secretKey + - secretName type: object - status: - description: Status is the status of the adapter + value: + description: Direct API key value type: string - required: - - name - - path - - podAssignments - - status type: object - type: array - message: - description: Message provides additional information about the current - phase. - type: string - observedGeneration: - description: ObservedGeneration represents the .metadata.generation - that the condition was set based upon. - format: int64 - minimum: 0 - type: integer - phase: - description: Phase represents the current phase of the adapter deployment. - type: string - type: object - type: object - served: true - storage: true - subresources: - status: {} + required: + - adapterSource + - baseModel + type: object + status: + description: LoraAdapterStatus defines the observed state of LoraAdapter. + properties: + conditions: + description: + Condition contains details for one aspect of the current + state of this API Resource. + items: + description: + Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: + LastTransitionTime is the last time the condition + transitioned from one status to another. + format: date-time + type: string + message: + description: + Message is a human-readable message indicating + details about why the current state is set. + maxLength: 32768 + type: string + reason: + description: + Reason is a brief reason for the condition's current + status. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: Status is the status of the condition. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + loadedAdapters: + description: + LoadedAdapters tracks the loading status of adapters + and their pod assignments. + items: + description: + LoadedAdapter represents an adapter that has been loaded + into a pod + properties: + loadTime: + description: LoadTime is when the adapter was loaded + format: date-time + type: string + name: + description: Name is the name of the adapter + type: string + path: + description: Path is the path where the adapter is loaded + type: string + podAssignments: + description: + PodAssignments represents the pods this adapter + has been assigned to + properties: + namespace: + description: Namespace is the namespace of the pod + type: string + podName: + description: Pod represents the pod information + type: string + required: + - namespace + - podName + type: object + status: + description: Status is the status of the adapter + type: string + required: + - name + - path + - podAssignments + - status + type: object + type: array + message: + description: + Message provides additional information about the current + phase. + type: string + observedGeneration: + description: + ObservedGeneration represents the .metadata.generation + that the condition was set based upon. + format: int64 + minimum: 0 + type: integer + phase: + description: Phase represents the current phase of the adapter deployment. + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} --- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition @@ -386,247 +405,252 @@ spec: singular: vllmrouter scope: Namespaced versions: - - name: v1alpha1 - schema: - openAPIV3Schema: - description: VLLMRouter is the Schema for the vllmrouters API - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: VLLMRouterSpec defines the desired state of VLLMRouter - properties: - enableRouter: - default: true - description: EnableRouter determines if the router should be deployed - type: boolean - engineScrapeInterval: - description: EngineScrapeInterval for collecting engine statistics - format: int32 - type: integer - env: - description: Environment variables - items: - description: EnvVar represents an environment variable + - name: v1alpha1 + schema: + openAPIV3Schema: + description: VLLMRouter is the Schema for the vllmrouters API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: VLLMRouterSpec defines the desired state of VLLMRouter + properties: + enableRouter: + default: true + description: EnableRouter determines if the router should be deployed + type: boolean + engineScrapeInterval: + description: EngineScrapeInterval for collecting engine statistics + format: int32 + type: integer + env: + description: Environment variables + items: + description: EnvVar represents an environment variable + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + extraArgs: + description: ExtraArgs for additional router arguments + items: + type: string + type: array + image: + description: Image configuration properties: name: type: string - value: + pullPolicy: + type: string + pullSecretName: + type: string + registry: type: string required: - - name - - value + - name + - registry type: object - type: array - extraArgs: - description: ExtraArgs for additional router arguments - items: + k8sLabelSelector: + description: + K8sLabelSelector specifies the label selector for vLLM + runtime pods when using k8s service discovery type: string - type: array - image: - description: Image configuration - properties: - name: - type: string - pullPolicy: - type: string - pullSecretName: - type: string - registry: - type: string - required: - - name - - registry - type: object - k8sLabelSelector: - description: K8sLabelSelector specifies the label selector for vLLM - runtime pods when using k8s service discovery - type: string - nodeSelectorTerms: - description: NodeSelectorTerms for pod scheduling - items: - description: |- - A null or empty node selector term matches no objects. The requirements of - them are ANDed. - The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. - properties: - matchExpressions: - description: A list of node selector requirements by node's - labels. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: + nodeSelectorTerms: + description: NodeSelectorTerms for pod scheduling + items: + description: |- + A null or empty node selector term matches no objects. The requirements of + them are ANDed. + The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + properties: + matchExpressions: + description: + A list of node selector requirements by node's + labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that the selector applies to. type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchFields: - description: A list of node selector requirements by node's - fields. - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: + A list of node selector requirements by node's + fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + type: array + port: + default: 80 + description: ContainerPort for the router service + format: int32 + type: integer + replicas: + default: 1 + description: Replicas specifies the number of router replicas + format: int32 + type: integer + requestStatsWindow: + description: RequestStatsWindow for request statistics + format: int32 + type: integer + resources: + description: Resource requirements + properties: + cpu: + type: string + gpu: + type: string + memory: + type: string + type: object + routingLogic: + default: roundrobin + description: RoutingLogic specifies the routing strategy + enum: + - roundrobin + - session + type: string + serviceAccountName: + description: ServiceAccountName for the router pod + type: string + serviceDiscovery: + default: k8s + description: + ServiceDiscovery specifies the service discovery method + (k8s or static) + enum: + - k8s + - static + type: string + sessionKey: + default: "" + description: SessionKey for session-based routing + type: string + staticBackends: + description: + StaticBackends is required when using static service + discovery + type: string + staticModels: + description: StaticModels is required when using static service discovery + type: string + vllmApiKeyName: + type: string + vllmApiKeySecret: + description: VLLM API Key configuration + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string type: object x-kubernetes-map-type: atomic - type: array - port: - default: 80 - description: ContainerPort for the router service - format: int32 - type: integer - replicas: - default: 1 - description: Replicas specifies the number of router replicas - format: int32 - type: integer - requestStatsWindow: - description: RequestStatsWindow for request statistics - format: int32 - type: integer - resources: - description: Resource requirements - properties: - cpu: - type: string - gpu: - type: string - memory: - type: string - type: object - routingLogic: - default: roundrobin - description: RoutingLogic specifies the routing strategy - enum: - - roundrobin - - session - type: string - serviceAccountName: - description: ServiceAccountName for the router pod - type: string - serviceDiscovery: - default: k8s - description: ServiceDiscovery specifies the service discovery method - (k8s or static) - enum: - - k8s - - static - type: string - sessionKey: - default: "" - description: SessionKey for session-based routing - type: string - staticBackends: - description: StaticBackends is required when using static service - discovery - type: string - staticModels: - description: StaticModels is required when using static service discovery - type: string - vllmApiKeyName: - type: string - vllmApiKeySecret: - description: VLLM API Key configuration - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - required: - - image - - resources - type: object - status: - description: VLLMRouterStatus defines the observed state of VLLMRouter - properties: - activeRuntimes: - description: Number of active runtimes - format: int32 - type: integer - lastUpdated: - description: Last updated timestamp - format: date-time - type: string - status: - description: Router status - type: string - type: object - type: object - served: true - storage: true - subresources: - status: {} + required: + - image + - resources + type: object + status: + description: VLLMRouterStatus defines the observed state of VLLMRouter + properties: + activeRuntimes: + description: Number of active runtimes + format: int32 + type: integer + lastUpdated: + description: Last updated timestamp + format: date-time + type: string + status: + description: Router status + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} --- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition @@ -641,219 +665,222 @@ spec: listKind: VLLMRuntimeList plural: vllmruntimes shortNames: - - vr + - vr singular: vllmruntime scope: Namespaced versions: - - name: v1alpha1 - schema: - openAPIV3Schema: - description: VLLMRuntime is the Schema for the vllmruntimes API - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: VLLMRuntimeSpec defines the desired state of VLLMRuntime - properties: - deploymentConfig: - description: Deployment configuration - properties: - deploymentStrategy: - default: RollingUpdate - description: Deploy strategy - enum: - - RollingUpdate - - Recreate - type: string - image: - description: Image configuration - properties: - name: - type: string - pullPolicy: - type: string - pullSecretName: - type: string - registry: - type: string - required: - - name - - registry - type: object - replicas: - default: 1 - description: Replicas - format: int32 - type: integer - resources: - description: Resource requirements - properties: - cpu: - type: string - gpu: - type: string - memory: - type: string - type: object - required: - - image - - resources - type: object - lmCacheConfig: - description: LM Cache configuration - properties: - cpuOffloadingBufferSize: - default: 4Gi - description: CPUOffloadingBufferSize is the size of the CPU offloading - buffer - type: string - diskOffloadingBufferSize: - default: 8Gi - description: DiskOffloadingBufferSize is the size of the disk - offloading buffer - type: string - enabled: - default: false - description: Enabled enables LM Cache - type: boolean - remoteSerde: - description: RemoteSerde is the serialization format for the remote - cache - type: string - remoteUrl: - description: RemoteURL is the URL of the remote cache server - type: string - type: object - model: - description: Model configuration - properties: - dtype: - description: Data type - type: string - enableLoRA: - description: Enable LoRA - type: boolean - enableTool: - description: Enable tool - type: boolean - hfTokenName: - default: token - type: string - hfTokenSecret: - description: HuggingFace token secret - properties: - name: - default: "" - description: |- - Name of the referent. - This field is effectively required, but due to backwards compatibility is - allowed to be empty. Instances of this type with an empty value here are - almost certainly wrong. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - type: object - x-kubernetes-map-type: atomic - maxModelLen: - description: Maximum model length - format: int32 - type: integer - maxNumSeqs: - description: Maximum number of sequences - format: int32 - type: integer - modelURL: - description: Model URL - type: string - toolCallParser: - description: Tool call parser - type: string - required: - - modelURL - type: object - vllmConfig: - description: vLLM server configuration - properties: - enableChunkedPrefill: - description: Enable chunked prefill - type: boolean - enablePrefixCaching: - description: Enable prefix caching - type: boolean - env: - description: Environment variables - items: - description: EnvVar represents an environment variable + - name: v1alpha1 + schema: + openAPIV3Schema: + description: VLLMRuntime is the Schema for the vllmruntimes API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: VLLMRuntimeSpec defines the desired state of VLLMRuntime + properties: + deploymentConfig: + description: Deployment configuration + properties: + deploymentStrategy: + default: RollingUpdate + description: Deploy strategy + enum: + - RollingUpdate + - Recreate + type: string + image: + description: Image configuration properties: name: type: string - value: + pullPolicy: + type: string + pullSecretName: + type: string + registry: type: string required: - - name - - value + - name + - registry + type: object + replicas: + default: 1 + description: Replicas + format: int32 + type: integer + resources: + description: Resource requirements + properties: + cpu: + type: string + gpu: + type: string + memory: + type: string type: object - type: array - extraArgs: - description: Extra arguments for vllm serve - items: + required: + - image + - resources + type: object + lmCacheConfig: + description: LM Cache configuration + properties: + cpuOffloadingBufferSize: + default: 4Gi + description: + CPUOffloadingBufferSize is the size of the CPU offloading + buffer type: string - type: array - gpuMemoryUtilization: - description: GPU memory utilization - type: string - maxLoras: - description: Maximum number of LoRAs - format: int32 - type: integer - port: - default: 8000 - description: Port for vLLM server - format: int32 - type: integer - tensorParallelSize: - description: Tensor parallel size - format: int32 - type: integer - v1: - description: Use V1 API - type: boolean - type: object - required: - - deploymentConfig - - model - - vllmConfig - type: object - status: - description: VLLMRuntimeStatus defines the observed state of VLLMRuntime - properties: - lastUpdated: - description: Last updated timestamp - format: date-time - type: string - modelStatus: - description: Model status - type: string - type: object - type: object - served: true - storage: true - subresources: - status: {} + diskOffloadingBufferSize: + default: 8Gi + description: + DiskOffloadingBufferSize is the size of the disk + offloading buffer + type: string + enabled: + default: false + description: Enabled enables LM Cache + type: boolean + remoteSerde: + description: + RemoteSerde is the serialization format for the remote + cache + type: string + remoteUrl: + description: RemoteURL is the URL of the remote cache server + type: string + type: object + model: + description: Model configuration + properties: + dtype: + description: Data type + type: string + enableLoRA: + description: Enable LoRA + type: boolean + enableTool: + description: Enable tool + type: boolean + hfTokenName: + default: token + type: string + hfTokenSecret: + description: HuggingFace token secret + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + maxModelLen: + description: Maximum model length + format: int32 + type: integer + maxNumSeqs: + description: Maximum number of sequences + format: int32 + type: integer + modelURL: + description: Model URL + type: string + toolCallParser: + description: Tool call parser + type: string + required: + - modelURL + type: object + vllmConfig: + description: vLLM server configuration + properties: + enableChunkedPrefill: + description: Enable chunked prefill + type: boolean + enablePrefixCaching: + description: Enable prefix caching + type: boolean + env: + description: Environment variables + items: + description: EnvVar represents an environment variable + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + extraArgs: + description: Extra arguments for vllm serve + items: + type: string + type: array + gpuMemoryUtilization: + description: GPU memory utilization + type: string + maxLoras: + description: Maximum number of LoRAs + format: int32 + type: integer + port: + default: 8000 + description: Port for vLLM server + format: int32 + type: integer + tensorParallelSize: + description: Tensor parallel size + format: int32 + type: integer + v1: + description: Use V1 API + type: boolean + type: object + required: + - deploymentConfig + - model + - vllmConfig + type: object + status: + description: VLLMRuntimeStatus defines the observed state of VLLMRuntime + properties: + lastUpdated: + description: Last updated timestamp + format: date-time + type: string + modelStatus: + description: Model status + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} --- apiVersion: v1 kind: ServiceAccount @@ -873,37 +900,37 @@ metadata: name: production-stack-leader-election-role namespace: production-stack-system rules: -- apiGroups: - - "" - resources: - - configmaps - verbs: - - get - - list - - watch - - create - - update - - patch - - delete -- apiGroups: - - coordination.k8s.io - resources: - - leases - verbs: - - get - - list - - watch - - create - - update - - patch - - delete -- apiGroups: - - "" - resources: - - events - verbs: - - create - - patch + - apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + - apiGroups: + - "" + resources: + - events + verbs: + - create + - patch --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -913,18 +940,18 @@ metadata: app.kubernetes.io/name: production-stack name: production-stack-cacheserver-admin-role rules: -- apiGroups: - - production-stack.vllm.ai - resources: - - cacheservers - verbs: - - '*' -- apiGroups: - - production-stack.vllm.ai - resources: - - cacheservers/status - verbs: - - get + - apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers + verbs: + - "*" + - apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers/status + verbs: + - get --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -934,24 +961,24 @@ metadata: app.kubernetes.io/name: production-stack name: production-stack-cacheserver-editor-role rules: -- apiGroups: - - production-stack.vllm.ai - resources: - - cacheservers - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - production-stack.vllm.ai - resources: - - cacheservers/status - verbs: - - get + - apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers/status + verbs: + - get --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -961,20 +988,20 @@ metadata: app.kubernetes.io/name: production-stack name: production-stack-cacheserver-viewer-role rules: -- apiGroups: - - production-stack.vllm.ai - resources: - - cacheservers - verbs: - - get - - list - - watch -- apiGroups: - - production-stack.vllm.ai - resources: - - cacheservers/status - verbs: - - get + - apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers + verbs: + - get + - list + - watch + - apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers/status + verbs: + - get --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -984,18 +1011,18 @@ metadata: app.kubernetes.io/name: production-stack name: production-stack-loraadapter-admin-role rules: -- apiGroups: - - production-stack.vllm.ai - resources: - - loraadapters - verbs: - - '*' -- apiGroups: - - production-stack.vllm.ai - resources: - - loraadapters/status - verbs: - - get + - apiGroups: + - production-stack.vllm.ai + resources: + - loraadapters + verbs: + - "*" + - apiGroups: + - production-stack.vllm.ai + resources: + - loraadapters/status + verbs: + - get --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -1005,24 +1032,24 @@ metadata: app.kubernetes.io/name: production-stack name: production-stack-loraadapter-editor-role rules: -- apiGroups: - - production-stack.vllm.ai - resources: - - loraadapters - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - production-stack.vllm.ai - resources: - - loraadapters/status - verbs: - - get + - apiGroups: + - production-stack.vllm.ai + resources: + - loraadapters + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - production-stack.vllm.ai + resources: + - loraadapters/status + verbs: + - get --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -1032,137 +1059,137 @@ metadata: app.kubernetes.io/name: production-stack name: production-stack-loraadapter-viewer-role rules: -- apiGroups: - - production-stack.vllm.ai - resources: - - loraadapters - verbs: - - get - - list - - watch -- apiGroups: - - production-stack.vllm.ai - resources: - - loraadapters/status - verbs: - - get + - apiGroups: + - production-stack.vllm.ai + resources: + - loraadapters + verbs: + - get + - list + - watch + - apiGroups: + - production-stack.vllm.ai + resources: + - loraadapters/status + verbs: + - get --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: production-stack-manager-role rules: -- apiGroups: - - "" - resources: - - configmaps - - secrets - - serviceaccounts - - services - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - "" - resources: - - pods - verbs: - - get - - list - - watch -- apiGroups: - - apps - resources: - - deployments - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - production-stack.vllm.ai - resources: - - cacheservers - - loraadapters - - vllmrouters - - vllmruntimes - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - production-stack.vllm.ai - resources: - - cacheservers/finalizers - - loraadapters/finalizers - - vllmrouters/finalizers - - vllmruntimes/finalizers - verbs: - - update -- apiGroups: - - production-stack.vllm.ai - resources: - - cacheservers/status - - loraadapters/status - - vllmrouters/status - - vllmruntimes/status - verbs: - - get - - patch - - update -- apiGroups: - - rbac.authorization.k8s.io - resources: - - rolebindings - - roles - verbs: - - create - - delete - - get - - list - - patch - - update - - watch + - apiGroups: + - "" + resources: + - configmaps + - secrets + - serviceaccounts + - services + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch + - apiGroups: + - apps + resources: + - deployments + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers + - loraadapters + - vllmrouters + - vllmruntimes + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers/finalizers + - loraadapters/finalizers + - vllmrouters/finalizers + - vllmruntimes/finalizers + verbs: + - update + - apiGroups: + - production-stack.vllm.ai + resources: + - cacheservers/status + - loraadapters/status + - vllmrouters/status + - vllmruntimes/status + verbs: + - get + - patch + - update + - apiGroups: + - rbac.authorization.k8s.io + resources: + - rolebindings + - roles + verbs: + - create + - delete + - get + - list + - patch + - update + - watch --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: production-stack-metrics-auth-role rules: -- apiGroups: - - authentication.k8s.io - resources: - - tokenreviews - verbs: - - create -- apiGroups: - - authorization.k8s.io - resources: - - subjectaccessreviews - verbs: - - create + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: production-stack-metrics-reader rules: -- nonResourceURLs: - - /metrics - verbs: - - get + - nonResourceURLs: + - /metrics + verbs: + - get --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -1172,18 +1199,18 @@ metadata: app.kubernetes.io/name: production-stack name: production-stack-vllmrouter-admin-role rules: -- apiGroups: - - production-stack.vllm.ai - resources: - - vllmrouters - verbs: - - '*' -- apiGroups: - - production-stack.vllm.ai - resources: - - vllmrouters/status - verbs: - - get + - apiGroups: + - production-stack.vllm.ai + resources: + - vllmrouters + verbs: + - "*" + - apiGroups: + - production-stack.vllm.ai + resources: + - vllmrouters/status + verbs: + - get --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -1193,24 +1220,24 @@ metadata: app.kubernetes.io/name: production-stack name: production-stack-vllmrouter-editor-role rules: -- apiGroups: - - production-stack.vllm.ai - resources: - - vllmrouters - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - production-stack.vllm.ai - resources: - - vllmrouters/status - verbs: - - get + - apiGroups: + - production-stack.vllm.ai + resources: + - vllmrouters + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - production-stack.vllm.ai + resources: + - vllmrouters/status + verbs: + - get --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -1220,20 +1247,20 @@ metadata: app.kubernetes.io/name: production-stack name: production-stack-vllmrouter-viewer-role rules: -- apiGroups: - - production-stack.vllm.ai - resources: - - vllmrouters - verbs: - - get - - list - - watch -- apiGroups: - - production-stack.vllm.ai - resources: - - vllmrouters/status - verbs: - - get + - apiGroups: + - production-stack.vllm.ai + resources: + - vllmrouters + verbs: + - get + - list + - watch + - apiGroups: + - production-stack.vllm.ai + resources: + - vllmrouters/status + verbs: + - get --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -1243,18 +1270,18 @@ metadata: app.kubernetes.io/name: production-stack name: production-stack-vllmruntime-admin-role rules: -- apiGroups: - - production-stack.vllm.ai - resources: - - vllmruntimes - verbs: - - '*' -- apiGroups: - - production-stack.vllm.ai - resources: - - vllmruntimes/status - verbs: - - get + - apiGroups: + - production-stack.vllm.ai + resources: + - vllmruntimes + verbs: + - "*" + - apiGroups: + - production-stack.vllm.ai + resources: + - vllmruntimes/status + verbs: + - get --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -1264,24 +1291,24 @@ metadata: app.kubernetes.io/name: production-stack name: production-stack-vllmruntime-editor-role rules: -- apiGroups: - - production-stack.vllm.ai - resources: - - vllmruntimes - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - production-stack.vllm.ai - resources: - - vllmruntimes/status - verbs: - - get + - apiGroups: + - production-stack.vllm.ai + resources: + - vllmruntimes + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - production-stack.vllm.ai + resources: + - vllmruntimes/status + verbs: + - get --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -1291,20 +1318,20 @@ metadata: app.kubernetes.io/name: production-stack name: production-stack-vllmruntime-viewer-role rules: -- apiGroups: - - production-stack.vllm.ai - resources: - - vllmruntimes - verbs: - - get - - list - - watch -- apiGroups: - - production-stack.vllm.ai - resources: - - vllmruntimes/status - verbs: - - get + - apiGroups: + - production-stack.vllm.ai + resources: + - vllmruntimes + verbs: + - get + - list + - watch + - apiGroups: + - production-stack.vllm.ai + resources: + - vllmruntimes/status + verbs: + - get --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding @@ -1319,9 +1346,9 @@ roleRef: kind: Role name: production-stack-leader-election-role subjects: -- kind: ServiceAccount - name: production-stack-controller-manager - namespace: production-stack-system + - kind: ServiceAccount + name: production-stack-controller-manager + namespace: production-stack-system --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding @@ -1335,9 +1362,9 @@ roleRef: kind: ClusterRole name: production-stack-manager-role subjects: -- kind: ServiceAccount - name: production-stack-controller-manager - namespace: production-stack-system + - kind: ServiceAccount + name: production-stack-controller-manager + namespace: production-stack-system --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding @@ -1348,9 +1375,9 @@ roleRef: kind: ClusterRole name: production-stack-metrics-auth-role subjects: -- kind: ServiceAccount - name: production-stack-controller-manager - namespace: production-stack-system + - kind: ServiceAccount + name: production-stack-controller-manager + namespace: production-stack-system --- apiVersion: v1 kind: Service @@ -1363,10 +1390,10 @@ metadata: namespace: production-stack-system spec: ports: - - name: https - port: 8443 - protocol: TCP - targetPort: 8443 + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 selector: app.kubernetes.io/name: production-stack control-plane: controller-manager @@ -1397,41 +1424,41 @@ spec: app.kubernetes.io/name: production-stack spec: containers: - - args: - - --metrics-bind-address=:8443 - - --leader-elect - - --health-probe-bind-address=:8081 - command: - - /manager - image: lmcache/production-stack-operator:latest - imagePullPolicy: Always - livenessProbe: - httpGet: - path: /healthz - port: 8081 - initialDelaySeconds: 15 - periodSeconds: 20 - name: manager - ports: [] - readinessProbe: - httpGet: - path: /readyz - port: 8081 - initialDelaySeconds: 5 - periodSeconds: 10 - resources: - limits: - cpu: 500m - memory: 128Mi - requests: - cpu: 10m - memory: 64Mi - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - volumeMounts: [] + - args: + - --metrics-bind-address=:8443 + - --leader-elect + - --health-probe-bind-address=:8081 + command: + - /manager + image: lmcache/production-stack-operator:latest + imagePullPolicy: Always + livenessProbe: + httpGet: + path: /healthz + port: 8081 + initialDelaySeconds: 15 + periodSeconds: 20 + name: manager + ports: [] + readinessProbe: + httpGet: + path: /readyz + port: 8081 + initialDelaySeconds: 5 + periodSeconds: 10 + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 10m + memory: 64Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + volumeMounts: [] securityContext: runAsNonRoot: true seccompProfile: diff --git a/operator/config/manager/kustomization.yaml b/operator/config/manager/kustomization.yaml index cc9f03e24..ac10fc9f6 100644 --- a/operator/config/manager/kustomization.yaml +++ b/operator/config/manager/kustomization.yaml @@ -1,9 +1,3 @@ resources: -- namespace.yaml -- deployment.yaml -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -images: -- name: controller - newName: 1nfinity/production-stack-controller - newTag: latest + - namespace.yaml + - deployment.yaml