From 2a6cf602350b13891490dc8597d9d3b1726d4819 Mon Sep 17 00:00:00 2001
From: royyhuang <roy.y.huang@gmail.com>
Date: Thu, 8 May 2025 05:10:33 +0000
Subject: [PATCH 01/14] add CRD support for production stack

Signed-off-by: royyhuang <roy.y.huang@gmail.com>
---
 .dockerignore                                 |   3 +
 .gitignore                                    |  31 +
 Dockerfile                                    |  33 ++
 Makefile                                      | 225 ++++++++
 PROJECT                                       |  29 +
 api/v1alpha1/groupversion_info.go             |  36 ++
 api/v1alpha1/vllmrouter_types.go              | 128 +++++
 api/v1alpha1/vllmruntime_types.go             | 184 ++++++
 api/v1alpha1/zz_generated.deepcopy.go         | 316 +++++++++++
 cmd/main.go                                   | 252 +++++++++
 ...production-stack.vllm.ai_staticroutes.yaml | 218 +++++++
 .../bases/serving.vllm.ai_vllmrouters.yaml    | 251 ++++++++
 .../bases/serving.vllm.ai_vllmruntimes.yaml   | 216 +++++++
 config/crd/kustomization.yaml                 |  17 +
 config/crd/kustomizeconfig.yaml               |  19 +
 .../default/cert_metrics_manager_patch.yaml   |  30 +
 config/default/kustomization.yaml             | 234 ++++++++
 config/default/manager_metrics_patch.yaml     |   4 +
 config/default/metrics_service.yaml           |  18 +
 config/manager/deployment.yaml                |  92 +++
 config/manager/kustomization.yaml             |   9 +
 config/manager/namespace.yaml                 |   8 +
 .../network-policy/allow-metrics-traffic.yaml |  27 +
 config/network-policy/kustomization.yaml      |   2 +
 config/prometheus/kustomization.yaml          |  11 +
 config/prometheus/monitor.yaml                |  27 +
 config/prometheus/monitor_tls_patch.yaml      |  19 +
 config/rbac/kustomization.yaml                |  30 +
 config/rbac/leader_election_role.yaml         |  40 ++
 config/rbac/leader_election_role_binding.yaml |  15 +
 config/rbac/metrics_auth_role.yaml            |  17 +
 config/rbac/metrics_auth_role_binding.yaml    |  12 +
 config/rbac/metrics_reader_role.yaml          |   9 +
 config/rbac/pod_viewer_role.yaml              |  17 +
 config/rbac/role.yaml                         |  95 ++++
 config/rbac/role_binding.yaml                 |  15 +
 config/rbac/service_account.yaml              |   8 +
 config/rbac/vllmrouter_admin_role.yaml        |  27 +
 config/rbac/vllmrouter_editor_role.yaml       |  33 ++
 config/rbac/vllmrouter_role_binding.yaml      |  16 +
 config/rbac/vllmrouter_service_account.yaml   |   8 +
 config/rbac/vllmrouter_viewer_role.yaml       |  29 +
 config/rbac/vllmruntime_admin_role.yaml       |  27 +
 config/rbac/vllmruntime_editor_role.yaml      |  33 ++
 config/rbac/vllmruntime_viewer_role.yaml      |  29 +
 config/samples/kustomization.yaml             |   8 +
 config/samples/serving_v1alpha1_router.yaml   |  57 ++
 .../samples/serving_v1alpha1_vllmruntime.yaml |  63 +++
 go.mod                                        | 100 ++++
 go.sum                                        | 254 +++++++++
 hack/boilerplate.go.txt                       |  15 +
 internal/controller/suite_test.go             |  88 +++
 internal/controller/vllmrouter_controller.go  | 383 +++++++++++++
 .../controller/vllmrouter_controller_test.go  |  84 +++
 internal/controller/vllmruntime_controller.go | 534 ++++++++++++++++++
 .../controller/vllmruntime_controller_test.go |  84 +++
 56 files changed, 4569 insertions(+)
 create mode 100644 .dockerignore
 create mode 100644 Dockerfile
 create mode 100644 Makefile
 create mode 100644 PROJECT
 create mode 100644 api/v1alpha1/groupversion_info.go
 create mode 100644 api/v1alpha1/vllmrouter_types.go
 create mode 100644 api/v1alpha1/vllmruntime_types.go
 create mode 100644 api/v1alpha1/zz_generated.deepcopy.go
 create mode 100644 cmd/main.go
 create mode 100644 config/crd/bases/production-stack.vllm.ai_staticroutes.yaml
 create mode 100644 config/crd/bases/serving.vllm.ai_vllmrouters.yaml
 create mode 100644 config/crd/bases/serving.vllm.ai_vllmruntimes.yaml
 create mode 100644 config/crd/kustomization.yaml
 create mode 100644 config/crd/kustomizeconfig.yaml
 create mode 100644 config/default/cert_metrics_manager_patch.yaml
 create mode 100644 config/default/kustomization.yaml
 create mode 100644 config/default/manager_metrics_patch.yaml
 create mode 100644 config/default/metrics_service.yaml
 create mode 100644 config/manager/deployment.yaml
 create mode 100644 config/manager/kustomization.yaml
 create mode 100644 config/manager/namespace.yaml
 create mode 100644 config/network-policy/allow-metrics-traffic.yaml
 create mode 100644 config/network-policy/kustomization.yaml
 create mode 100644 config/prometheus/kustomization.yaml
 create mode 100644 config/prometheus/monitor.yaml
 create mode 100644 config/prometheus/monitor_tls_patch.yaml
 create mode 100644 config/rbac/kustomization.yaml
 create mode 100644 config/rbac/leader_election_role.yaml
 create mode 100644 config/rbac/leader_election_role_binding.yaml
 create mode 100644 config/rbac/metrics_auth_role.yaml
 create mode 100644 config/rbac/metrics_auth_role_binding.yaml
 create mode 100644 config/rbac/metrics_reader_role.yaml
 create mode 100644 config/rbac/pod_viewer_role.yaml
 create mode 100644 config/rbac/role.yaml
 create mode 100644 config/rbac/role_binding.yaml
 create mode 100644 config/rbac/service_account.yaml
 create mode 100644 config/rbac/vllmrouter_admin_role.yaml
 create mode 100644 config/rbac/vllmrouter_editor_role.yaml
 create mode 100644 config/rbac/vllmrouter_role_binding.yaml
 create mode 100644 config/rbac/vllmrouter_service_account.yaml
 create mode 100644 config/rbac/vllmrouter_viewer_role.yaml
 create mode 100644 config/rbac/vllmruntime_admin_role.yaml
 create mode 100644 config/rbac/vllmruntime_editor_role.yaml
 create mode 100644 config/rbac/vllmruntime_viewer_role.yaml
 create mode 100644 config/samples/kustomization.yaml
 create mode 100644 config/samples/serving_v1alpha1_router.yaml
 create mode 100644 config/samples/serving_v1alpha1_vllmruntime.yaml
 create mode 100644 go.mod
 create mode 100644 go.sum
 create mode 100644 hack/boilerplate.go.txt
 create mode 100644 internal/controller/suite_test.go
 create mode 100644 internal/controller/vllmrouter_controller.go
 create mode 100644 internal/controller/vllmrouter_controller_test.go
 create mode 100644 internal/controller/vllmruntime_controller.go
 create mode 100644 internal/controller/vllmruntime_controller_test.go

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 000000000..a3aab7af7
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,3 @@
+# More info: https://docs.docker.com/engine/reference/builder/#dockerignore-file
+# Ignore build and test binaries.
+bin/
diff --git a/.gitignore b/.gitignore
index 4bc1bdf22..1341491df 100644
--- a/.gitignore
+++ b/.gitignore
@@ -147,3 +147,34 @@ terraform.rc
 
 # google cloud platform
 credentials.json
+
+.idea/
+.vscode/
+WORKSPACE
+.DS_Store
+# don't check in the build output of the book
+docs/book/book/
+
+# ignore auto-generated dir by `mdbook serve`
+docs/book/src/docs
+
+# Editor temp files
+*~
+\#*#
+*.swp
+
+# Skip bazel dirs
+/bazel-*
+
+# skip bin dirs
+**/bin
+**/testbin
+
+# skip .out files (coverage tests)
+*.out
+
+# skip testdata go.sum, since it may have
+# different result depending on go version
+/testdata/**/go.sum
+/docs/book/src/simple-external-plugin-tutorial/testdata/sampleexternalplugin/v1/bin
+/testdata/**legacy**
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 000000000..4ea148aed
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,33 @@
+# Build the manager binary
+FROM docker.io/golang:1.24 AS builder
+ARG TARGETOS
+ARG TARGETARCH
+
+WORKDIR /workspace
+# Copy the Go Modules manifests
+COPY go.mod go.mod
+COPY go.sum go.sum
+# cache deps before building and copying source so that we don't need to re-download as much
+# and so that source changes don't invalidate our downloaded layer
+RUN go mod download
+
+# Copy the go source
+COPY cmd/main.go cmd/main.go
+COPY api/ api/
+COPY internal/ internal/
+
+# Build
+# the GOARCH has not a default value to allow the binary be built according to the host where the command
+# was called. For example, if we call make docker-build in a local env which has the Apple Silicon M1 SO
+# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore,
+# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform.
+RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager cmd/main.go
+
+# Use distroless as minimal base image to package the manager binary
+# Refer to https://github.com/GoogleContainerTools/distroless for more details
+FROM gcr.io/distroless/static:nonroot
+WORKDIR /
+COPY --from=builder /workspace/manager .
+USER 65532:65532
+
+ENTRYPOINT ["/manager"]
diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000..a34e95dd6
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,225 @@
+# Image URL to use all building/pushing image targets
+IMG ?= controller:latest
+
+# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
+ifeq (,$(shell go env GOBIN))
+GOBIN=$(shell go env GOPATH)/bin
+else
+GOBIN=$(shell go env GOBIN)
+endif
+
+# CONTAINER_TOOL defines the container tool to be used for building images.
+# Be aware that the target commands are only tested with Docker which is
+# scaffolded by default. However, you might want to replace it to use other
+# tools. (i.e. podman)
+CONTAINER_TOOL ?= docker
+
+# Setting SHELL to bash allows bash commands to be executed by recipes.
+# Options are set to exit when a recipe line exits non-zero or a piped command fails.
+SHELL = /usr/bin/env bash -o pipefail
+.SHELLFLAGS = -ec
+
+.PHONY: all
+all: build
+
+##@ General
+
+# The help target prints out all targets with their descriptions organized
+# beneath their categories. The categories are represented by '##@' and the
+# target descriptions by '##'. The awk command is responsible for reading the
+# entire set of makefiles included in this invocation, looking for lines of the
+# file as xyz: ## something, and then pretty-format the target and help. Then,
+# if there's a line with ##@ something, that gets pretty-printed as a category.
+# More info on the usage of ANSI control characters for terminal formatting:
+# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters
+# More info on the awk command:
+# http://linuxcommand.org/lc3_adv_awk.php
+
+.PHONY: help
+help: ## Display this help.
+	@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n  make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf "  \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
+
+##@ Development
+
+.PHONY: manifests
+manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
+	$(CONTROLLER_GEN) rbac:roleName=manager-role crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases
+
+.PHONY: generate
+generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
+	$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..."
+
+.PHONY: fmt
+fmt: ## Run go fmt against code.
+	go fmt ./...
+
+.PHONY: vet
+vet: ## Run go vet against code.
+	go vet ./...
+
+.PHONY: test
+test: manifests generate fmt vet setup-envtest ## Run tests.
+	KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v /e2e) -coverprofile cover.out
+
+# TODO(user): To use a different vendor for e2e tests, modify the setup under 'tests/e2e'.
+# The default setup assumes Kind is pre-installed and builds/loads the Manager Docker image locally.
+# CertManager is installed by default; skip with:
+# - CERT_MANAGER_INSTALL_SKIP=true
+.PHONY: test-e2e
+test-e2e: manifests generate fmt vet ## Run the e2e tests. Expected an isolated environment using Kind.
+	@command -v $(KIND) >/dev/null 2>&1 || { \
+		echo "Kind is not installed. Please install Kind manually."; \
+		exit 1; \
+	}
+	@$(KIND) get clusters | grep -q 'kind' || { \
+		echo "No Kind cluster is running. Please start a Kind cluster before running the e2e tests."; \
+		exit 1; \
+	}
+	go test ./test/e2e/ -v -ginkgo.v
+
+.PHONY: lint
+lint: golangci-lint ## Run golangci-lint linter
+	$(GOLANGCI_LINT) run
+
+.PHONY: lint-fix
+lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes
+	$(GOLANGCI_LINT) run --fix
+
+.PHONY: lint-config
+lint-config: golangci-lint ## Verify golangci-lint linter configuration
+	$(GOLANGCI_LINT) config verify
+
+##@ Build
+
+.PHONY: build
+build: manifests generate fmt vet ## Build manager binary.
+	go build -o bin/manager cmd/main.go
+
+.PHONY: run
+run: manifests generate fmt vet ## Run a controller from your host.
+	go run ./cmd/main.go
+
+# If you wish to build the manager image targeting other platforms you can use the --platform flag.
+# (i.e. docker build --platform linux/arm64). However, you must enable docker buildKit for it.
+# More info: https://docs.docker.com/develop/develop-images/build_enhancements/
+.PHONY: docker-build
+docker-build: ## Build docker image with the manager.
+	$(CONTAINER_TOOL) build -t ${IMG} .
+
+.PHONY: docker-push
+docker-push: ## Push docker image with the manager.
+	$(CONTAINER_TOOL) push ${IMG}
+
+# PLATFORMS defines the target platforms for the manager image be built to provide support to multiple
+# architectures. (i.e. make docker-buildx IMG=myregistry/mypoperator:0.0.1). To use this option you need to:
+# - be able to use docker buildx. More info: https://docs.docker.com/build/buildx/
+# - have enabled BuildKit. More info: https://docs.docker.com/develop/develop-images/build_enhancements/
+# - be able to push the image to your registry (i.e. if you do not set a valid value via IMG=<myregistry/image:<tag>> then the export will fail)
+# To adequately provide solutions that are compatible with multiple platforms, you should consider using this option.
+PLATFORMS ?= linux/arm64,linux/amd64,linux/s390x,linux/ppc64le
+.PHONY: docker-buildx
+docker-buildx: ## Build and push docker image for the manager for cross-platform support
+	# copy existing Dockerfile and insert --platform=${BUILDPLATFORM} into Dockerfile.cross, and preserve the original Dockerfile
+	sed -e '1 s/\(^FROM\)/FROM --platform=\$$\{BUILDPLATFORM\}/; t' -e ' 1,// s//FROM --platform=\$$\{BUILDPLATFORM\}/' Dockerfile > Dockerfile.cross
+	- $(CONTAINER_TOOL) buildx create --name production-stack-builder
+	$(CONTAINER_TOOL) buildx use production-stack-builder
+	- $(CONTAINER_TOOL) buildx build --push --platform=$(PLATFORMS) --tag ${IMG} -f Dockerfile.cross .
+	- $(CONTAINER_TOOL) buildx rm production-stack-builder
+	rm Dockerfile.cross
+
+.PHONY: build-installer
+build-installer: manifests generate kustomize ## Generate a consolidated YAML with CRDs and deployment.
+	mkdir -p dist
+	cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
+	$(KUSTOMIZE) build config/default > dist/install.yaml
+
+##@ Deployment
+
+ifndef ignore-not-found
+  ignore-not-found = false
+endif
+
+.PHONY: install
+install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config.
+	$(KUSTOMIZE) build config/crd | $(KUBECTL) apply -f -
+
+.PHONY: uninstall
+uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
+	$(KUSTOMIZE) build config/crd | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f -
+
+.PHONY: deploy
+deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.
+	cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
+	$(KUSTOMIZE) build config/default | $(KUBECTL) apply -f -
+
+.PHONY: undeploy
+undeploy: kustomize ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
+	$(KUSTOMIZE) build config/default | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f -
+
+##@ Dependencies
+
+## Location to install dependencies to
+LOCALBIN ?= $(shell pwd)/bin
+$(LOCALBIN):
+	mkdir -p $(LOCALBIN)
+
+## Tool Binaries
+KUBECTL ?= kubectl
+KIND ?= kind
+KUSTOMIZE ?= $(LOCALBIN)/kustomize
+CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen
+ENVTEST ?= $(LOCALBIN)/setup-envtest
+GOLANGCI_LINT = $(LOCALBIN)/golangci-lint
+
+## Tool Versions
+KUSTOMIZE_VERSION ?= v5.6.0
+CONTROLLER_TOOLS_VERSION ?= v0.17.2
+#ENVTEST_VERSION is the version of controller-runtime release branch to fetch the envtest setup script (i.e. release-0.20)
+ENVTEST_VERSION ?= $(shell go list -m -f "{{ .Version }}" sigs.k8s.io/controller-runtime | awk -F'[v.]' '{printf "release-%d.%d", $$2, $$3}')
+#ENVTEST_K8S_VERSION is the version of Kubernetes to use for setting up ENVTEST binaries (i.e. 1.31)
+ENVTEST_K8S_VERSION ?= $(shell go list -m -f "{{ .Version }}" k8s.io/api | awk -F'[v.]' '{printf "1.%d", $$3}')
+GOLANGCI_LINT_VERSION ?= v1.63.4
+
+.PHONY: kustomize
+kustomize: $(KUSTOMIZE) ## Download kustomize locally if necessary.
+$(KUSTOMIZE): $(LOCALBIN)
+	$(call go-install-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/v5,$(KUSTOMIZE_VERSION))
+
+.PHONY: controller-gen
+controller-gen: $(CONTROLLER_GEN) ## Download controller-gen locally if necessary.
+$(CONTROLLER_GEN): $(LOCALBIN)
+	$(call go-install-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen,$(CONTROLLER_TOOLS_VERSION))
+
+.PHONY: setup-envtest
+setup-envtest: envtest ## Download the binaries required for ENVTEST in the local bin directory.
+	@echo "Setting up envtest binaries for Kubernetes version $(ENVTEST_K8S_VERSION)..."
+	@$(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path || { \
+		echo "Error: Failed to set up envtest binaries for version $(ENVTEST_K8S_VERSION)."; \
+		exit 1; \
+	}
+
+.PHONY: envtest
+envtest: $(ENVTEST) ## Download setup-envtest locally if necessary.
+$(ENVTEST): $(LOCALBIN)
+	$(call go-install-tool,$(ENVTEST),sigs.k8s.io/controller-runtime/tools/setup-envtest,$(ENVTEST_VERSION))
+
+.PHONY: golangci-lint
+golangci-lint: $(GOLANGCI_LINT) ## Download golangci-lint locally if necessary.
+$(GOLANGCI_LINT): $(LOCALBIN)
+	$(call go-install-tool,$(GOLANGCI_LINT),github.com/golangci/golangci-lint/cmd/golangci-lint,$(GOLANGCI_LINT_VERSION))
+
+# go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist
+# $1 - target path with name of binary
+# $2 - package url which can be installed
+# $3 - specific version of package
+define go-install-tool
+@[ -f "$(1)-$(3)" ] || { \
+set -e; \
+package=$(2)@$(3) ;\
+echo "Downloading $${package}" ;\
+rm -f $(1) || true ;\
+GOBIN=$(LOCALBIN) go install $${package} ;\
+mv $(1) $(1)-$(3) ;\
+} ;\
+ln -sf $(1)-$(3) $(1)
+endef
diff --git a/PROJECT b/PROJECT
new file mode 100644
index 000000000..97a77c2eb
--- /dev/null
+++ b/PROJECT
@@ -0,0 +1,29 @@
+# Code generated by tool. DO NOT EDIT.
+# This file is used to track the info used to scaffold your project
+# and allow the plugins properly work.
+# More info: https://book.kubebuilder.io/reference/project-config.html
+domain: vllm.ai
+layout:
+- go.kubebuilder.io/v4
+projectName: production-stack
+repo: production-stack
+resources:
+- api:
+    crdVersion: v1
+    namespaced: true
+  controller: true
+  domain: vllm.ai
+  group: serving
+  kind: VLLMRuntime
+  path: production-stack/api/v1alpha1
+  version: v1alpha1
+- api:
+    crdVersion: v1
+    namespaced: true
+  controller: true
+  domain: vllm.ai
+  group: serving
+  kind: Router
+  path: production-stack/api/v1alpha1
+  version: v1alpha1
+version: "3"
diff --git a/api/v1alpha1/groupversion_info.go b/api/v1alpha1/groupversion_info.go
new file mode 100644
index 000000000..9d3c2bf50
--- /dev/null
+++ b/api/v1alpha1/groupversion_info.go
@@ -0,0 +1,36 @@
+/*
+Copyright 2025.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Package v1alpha1 contains API Schema definitions for the serving v1alpha1 API group.
+// +kubebuilder:object:generate=true
+// +groupName=serving.vllm.ai
+package v1alpha1
+
+import (
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"sigs.k8s.io/controller-runtime/pkg/scheme"
+)
+
+var (
+	// GroupVersion is group version used to register these objects.
+	GroupVersion = schema.GroupVersion{Group: "serving.vllm.ai", Version: "v1alpha1"}
+
+	// SchemeBuilder is used to add go types to the GroupVersionKind scheme.
+	SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
+
+	// AddToScheme adds the types in this group-version to the given scheme.
+	AddToScheme = SchemeBuilder.AddToScheme
+)
diff --git a/api/v1alpha1/vllmrouter_types.go b/api/v1alpha1/vllmrouter_types.go
new file mode 100644
index 000000000..2bf632949
--- /dev/null
+++ b/api/v1alpha1/vllmrouter_types.go
@@ -0,0 +1,128 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+import (
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// EDIT THIS FILE!  THIS IS SCAFFOLDING FOR YOU TO OWN!
+// NOTE: json tags are required.  Any new fields you add must have json tags for the fields to be serialized.
+
+// VLLMRouterSpec defines the desired state of VLLMRouter
+type VLLMRouterSpec struct {
+	// EnableRouter determines if the router should be deployed
+	// +kubebuilder:default=true
+	EnableRouter bool `json:"enableRouter,omitempty"`
+
+	// Replicas specifies the number of router replicas
+	// +kubebuilder:default=1
+	Replicas int32 `json:"replicas,omitempty"`
+
+	// ServiceDiscovery specifies the service discovery method (k8s or static)
+	// +kubebuilder:validation:Enum=k8s;static
+	// +kubebuilder:default=k8s
+	ServiceDiscovery string `json:"serviceDiscovery,omitempty"`
+
+	// StaticBackends is required when using static service discovery
+	// +kubebuilder:validation:RequiredWhen=ServiceDiscovery=static
+	StaticBackends string `json:"staticBackends,omitempty"`
+
+	// StaticModels is required when using static service discovery
+	// +kubebuilder:validation:RequiredWhen=ServiceDiscovery=static
+	StaticModels string `json:"staticModels,omitempty"`
+
+	// RoutingLogic specifies the routing strategy
+	// +kubebuilder:validation:Enum=roundrobin;session
+	// +kubebuilder:default=roundrobin
+	RoutingLogic string `json:"routingLogic,omitempty"`
+
+	// SessionKey for session-based routing
+	// +kubebuilder:validation:RequiredWhen=RoutingLogic=session
+	// +kubebuilder:default=""
+	SessionKey string `json:"sessionKey,omitempty"`
+
+	// EngineScrapeInterval for collecting engine statistics
+	EngineScrapeInterval string `json:"engineScrapeInterval,omitempty"`
+
+	// RequestStatsWindow for request statistics
+	RequestStatsWindow string `json:"requestStatsWindow,omitempty"`
+
+	// ExtraArgs for additional router arguments
+	ExtraArgs []string `json:"extraArgs,omitempty"`
+
+	// NodeSelectorTerms for pod scheduling
+	NodeSelectorTerms []corev1.NodeSelectorTerm `json:"nodeSelectorTerms,omitempty"`
+
+	// ServiceAccountName for the router pod
+	ServiceAccountName string `json:"serviceAccountName,omitempty"`
+
+	// ContainerPort for the router service
+	// +kubebuilder:default=80
+	Port int32 `json:"port,omitempty"`
+
+	// Image configuration
+	Image ImageSpec `json:"image"`
+
+	// Resource requirements
+	Resources ResourceRequirements `json:"resources"`
+
+	// Environment variables
+	Env []EnvVar `json:"env,omitempty"`
+
+	// VLLM API Key configuration
+	VLLMApiKeySecret corev1.LocalObjectReference `json:"vllmApiKeySecret,omitempty"`
+	VLLMApiKeyName   string                      `json:"vllmApiKeyName,omitempty"`
+}
+
+// VLLMRouterStatus defines the observed state of VLLMRouter
+type VLLMRouterStatus struct {
+	// Router status
+	Status string `json:"status,omitempty"`
+
+	// Last updated timestamp
+	LastUpdated metav1.Time `json:"lastUpdated,omitempty"`
+
+	// Number of active runtimes
+	ActiveRuntimes int32 `json:"activeRuntimes,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+// +kubebuilder:subresource:status
+
+// VLLMRouter is the Schema for the vllmrouters API
+type VLLMRouter struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	Spec   VLLMRouterSpec   `json:"spec,omitempty"`
+	Status VLLMRouterStatus `json:"status,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+
+// VLLMRouterList contains a list of VLLMRouter
+type VLLMRouterList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []VLLMRouter `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&VLLMRouter{}, &VLLMRouterList{})
+}
diff --git a/api/v1alpha1/vllmruntime_types.go b/api/v1alpha1/vllmruntime_types.go
new file mode 100644
index 000000000..117a77463
--- /dev/null
+++ b/api/v1alpha1/vllmruntime_types.go
@@ -0,0 +1,184 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+import (
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// EDIT THIS FILE!  THIS IS SCAFFOLDING FOR YOU TO OWN!
+// NOTE: json tags are required.  Any new fields you add must have json tags for the fields to be serialized.
+
+// VLLMRuntimeSpec defines the desired state of VLLMRuntime
+type VLLMRuntimeSpec struct {
+	// Model configuration
+	Model ModelSpec `json:"model"`
+
+	// Enable chunked prefill
+	EnableChunkedPrefill bool `json:"enableChunkedPrefill,omitempty"`
+
+	// Enable prefix caching
+	EnablePrefixCaching bool `json:"enablePrefixCaching,omitempty"`
+
+	// Tensor parallel size
+	TensorParallelSize int32 `json:"tensorParallelSize,omitempty"`
+
+	// GPU memory utilization
+	GpuMemoryUtilization string `json:"gpuMemoryUtilization,omitempty"`
+
+	// Maximum number of LoRAs
+	MaxLoras int32 `json:"maxLoras,omitempty"`
+
+	// LM Cache configuration
+	LMCacheConfig LMCacheConfig `json:"lmCacheConfig,omitempty"`
+
+	// Extra arguments for vllm serve
+	ExtraArgs []string `json:"extraArgs,omitempty"`
+
+	// Use V1 API
+	V1 bool `json:"v1,omitempty"`
+
+	// Port for vLLM server
+	// +kubebuilder:default=8000
+	Port int32 `json:"port,omitempty"`
+
+	// Environment variables
+	Env []EnvVar `json:"env,omitempty"`
+
+	// Resource requirements
+	Resources ResourceRequirements `json:"resources"`
+
+	// Image configuration
+	Image ImageSpec `json:"image"`
+
+	// HuggingFace token secret
+	HFTokenSecret corev1.LocalObjectReference `json:"hfTokenSecret,omitempty"`
+	// +kubebuilder:default=token
+	// +kubebuilder:validation:RequiredWhen=HFTokenSecret.Name!=""
+	HFTokenName string `json:"hfTokenName,omitempty"`
+
+	// Replicas
+	// +kubebuilder:default=1
+	Replicas int32 `json:"replicas,omitempty"`
+
+	// Deploy strategy
+	// +kubebuilder:validation:Enum=RollingUpdate;Recreate
+	// +kubebuilder:default=RollingUpdate
+	DeployStrategy string `json:"deploymentStrategy,omitempty"`
+}
+
+// ModelSpec defines the model configuration
+type ModelSpec struct {
+	// Model URL
+	ModelURL string `json:"modelURL"`
+
+	// Enable LoRA
+	EnableLoRA bool `json:"enableLoRA,omitempty"`
+
+	// Enable tool
+	EnableTool bool `json:"enableTool,omitempty"`
+
+	// Tool call parser
+	ToolCallParser string `json:"toolCallParser,omitempty"`
+
+	// Maximum model length
+	MaxModelLen int32 `json:"maxModelLen,omitempty"`
+
+	// Data type
+	DType string `json:"dtype,omitempty"`
+
+	// Maximum number of sequences
+	MaxNumSeqs int32 `json:"maxNumSeqs,omitempty"`
+}
+
+// LMCacheConfig defines the LM Cache configuration
+type LMCacheConfig struct {
+	// Enabled enables LM Cache
+	// +kubebuilder:default=false
+	Enabled bool `json:"enabled,omitempty"`
+
+	// CPUOffloadingBufferSize is the size of the CPU offloading buffer
+	// +kubebuilder:default="4Gi"
+	CPUOffloadingBufferSize string `json:"cpuOffloadingBufferSize,omitempty"`
+
+	// DiskOffloadingBufferSize is the size of the disk offloading buffer
+	// +kubebuilder:default="8Gi"
+	DiskOffloadingBufferSize string `json:"diskOffloadingBufferSize,omitempty"`
+
+	// RemoteURL is the URL of the remote cache server
+	RemoteURL string `json:"remoteUrl,omitempty"`
+
+	// RemoteSerde is the serialization format for the remote cache
+	RemoteSerde string `json:"remoteSerde,omitempty"`
+}
+
+// EnvVar represents an environment variable
+type EnvVar struct {
+	Name  string `json:"name"`
+	Value string `json:"value"`
+}
+
+// ResourceRequirements defines the resource requirements
+type ResourceRequirements struct {
+	CPU    string `json:"cpu,omitempty"`
+	Memory string `json:"memory,omitempty"`
+	GPU    string `json:"gpu,omitempty"`
+}
+
+// ImageSpec defines the container image configuration
+type ImageSpec struct {
+	Registry       string `json:"registry"`
+	Name           string `json:"name"`
+	PullPolicy     string `json:"pullPolicy,omitempty"`
+	PullSecretName string `json:"pullSecretName,omitempty"`
+}
+
+// VLLMRuntimeStatus defines the observed state of VLLMRuntime
+type VLLMRuntimeStatus struct {
+	// Model status
+	ModelStatus string `json:"modelStatus,omitempty"`
+
+	// Last updated timestamp
+	LastUpdated metav1.Time `json:"lastUpdated,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+// +kubebuilder:subresource:status
+// +kubebuilder:resource:shortName=vr
+
+// VLLMRuntime is the Schema for the vllmruntimes API
+type VLLMRuntime struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	Spec   VLLMRuntimeSpec   `json:"spec,omitempty"`
+	Status VLLMRuntimeStatus `json:"status,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+
+// VLLMRuntimeList contains a list of VLLMRuntime
+type VLLMRuntimeList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []VLLMRuntime `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&VLLMRuntime{}, &VLLMRuntimeList{})
+}
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
new file mode 100644
index 000000000..51e91ba0a
--- /dev/null
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -0,0 +1,316 @@
+//go:build !ignore_autogenerated
+
+/*
+Copyright 2025.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Code generated by controller-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+	"k8s.io/api/core/v1"
+	runtime "k8s.io/apimachinery/pkg/runtime"
+)
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *EnvVar) DeepCopyInto(out *EnvVar) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EnvVar.
+func (in *EnvVar) DeepCopy() *EnvVar {
+	if in == nil {
+		return nil
+	}
+	out := new(EnvVar)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ImageSpec) DeepCopyInto(out *ImageSpec) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ImageSpec.
+func (in *ImageSpec) DeepCopy() *ImageSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(ImageSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *LMCacheConfig) DeepCopyInto(out *LMCacheConfig) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LMCacheConfig.
+func (in *LMCacheConfig) DeepCopy() *LMCacheConfig {
+	if in == nil {
+		return nil
+	}
+	out := new(LMCacheConfig)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ModelSpec) DeepCopyInto(out *ModelSpec) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelSpec.
+func (in *ModelSpec) DeepCopy() *ModelSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(ModelSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ResourceRequirements) DeepCopyInto(out *ResourceRequirements) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceRequirements.
+func (in *ResourceRequirements) DeepCopy() *ResourceRequirements {
+	if in == nil {
+		return nil
+	}
+	out := new(ResourceRequirements)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *VLLMRouter) DeepCopyInto(out *VLLMRouter) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+	in.Spec.DeepCopyInto(&out.Spec)
+	in.Status.DeepCopyInto(&out.Status)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VLLMRouter.
+func (in *VLLMRouter) DeepCopy() *VLLMRouter {
+	if in == nil {
+		return nil
+	}
+	out := new(VLLMRouter)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *VLLMRouter) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *VLLMRouterList) DeepCopyInto(out *VLLMRouterList) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ListMeta.DeepCopyInto(&out.ListMeta)
+	if in.Items != nil {
+		in, out := &in.Items, &out.Items
+		*out = make([]VLLMRouter, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VLLMRouterList.
+func (in *VLLMRouterList) DeepCopy() *VLLMRouterList {
+	if in == nil {
+		return nil
+	}
+	out := new(VLLMRouterList)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *VLLMRouterList) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *VLLMRouterSpec) DeepCopyInto(out *VLLMRouterSpec) {
+	*out = *in
+	if in.ExtraArgs != nil {
+		in, out := &in.ExtraArgs, &out.ExtraArgs
+		*out = make([]string, len(*in))
+		copy(*out, *in)
+	}
+	if in.NodeSelectorTerms != nil {
+		in, out := &in.NodeSelectorTerms, &out.NodeSelectorTerms
+		*out = make([]v1.NodeSelectorTerm, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+	out.Image = in.Image
+	out.Resources = in.Resources
+	if in.Env != nil {
+		in, out := &in.Env, &out.Env
+		*out = make([]EnvVar, len(*in))
+		copy(*out, *in)
+	}
+	out.VLLMApiKeySecret = in.VLLMApiKeySecret
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VLLMRouterSpec.
+func (in *VLLMRouterSpec) DeepCopy() *VLLMRouterSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(VLLMRouterSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *VLLMRouterStatus) DeepCopyInto(out *VLLMRouterStatus) {
+	*out = *in
+	in.LastUpdated.DeepCopyInto(&out.LastUpdated)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VLLMRouterStatus.
+func (in *VLLMRouterStatus) DeepCopy() *VLLMRouterStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(VLLMRouterStatus)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *VLLMRuntime) DeepCopyInto(out *VLLMRuntime) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+	in.Spec.DeepCopyInto(&out.Spec)
+	in.Status.DeepCopyInto(&out.Status)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VLLMRuntime.
+func (in *VLLMRuntime) DeepCopy() *VLLMRuntime {
+	if in == nil {
+		return nil
+	}
+	out := new(VLLMRuntime)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *VLLMRuntime) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *VLLMRuntimeList) DeepCopyInto(out *VLLMRuntimeList) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ListMeta.DeepCopyInto(&out.ListMeta)
+	if in.Items != nil {
+		in, out := &in.Items, &out.Items
+		*out = make([]VLLMRuntime, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VLLMRuntimeList.
+func (in *VLLMRuntimeList) DeepCopy() *VLLMRuntimeList {
+	if in == nil {
+		return nil
+	}
+	out := new(VLLMRuntimeList)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *VLLMRuntimeList) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *VLLMRuntimeSpec) DeepCopyInto(out *VLLMRuntimeSpec) {
+	*out = *in
+	out.Model = in.Model
+	out.LMCacheConfig = in.LMCacheConfig
+	if in.ExtraArgs != nil {
+		in, out := &in.ExtraArgs, &out.ExtraArgs
+		*out = make([]string, len(*in))
+		copy(*out, *in)
+	}
+	if in.Env != nil {
+		in, out := &in.Env, &out.Env
+		*out = make([]EnvVar, len(*in))
+		copy(*out, *in)
+	}
+	out.Resources = in.Resources
+	out.Image = in.Image
+	out.HFTokenSecret = in.HFTokenSecret
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VLLMRuntimeSpec.
+func (in *VLLMRuntimeSpec) DeepCopy() *VLLMRuntimeSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(VLLMRuntimeSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *VLLMRuntimeStatus) DeepCopyInto(out *VLLMRuntimeStatus) {
+	*out = *in
+	in.LastUpdated.DeepCopyInto(&out.LastUpdated)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VLLMRuntimeStatus.
+func (in *VLLMRuntimeStatus) DeepCopy() *VLLMRuntimeStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(VLLMRuntimeStatus)
+	in.DeepCopyInto(out)
+	return out
+}
diff --git a/cmd/main.go b/cmd/main.go
new file mode 100644
index 000000000..94c41e44b
--- /dev/null
+++ b/cmd/main.go
@@ -0,0 +1,252 @@
+/*
+Copyright 2025.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package main
+
+import (
+	"crypto/tls"
+	"flag"
+	"os"
+	"path/filepath"
+
+	// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
+	// to ensure that exec-entrypoint and run can make use of them.
+	_ "k8s.io/client-go/plugin/pkg/client/auth"
+
+	"k8s.io/apimachinery/pkg/runtime"
+	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
+	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/certwatcher"
+	"sigs.k8s.io/controller-runtime/pkg/healthz"
+	"sigs.k8s.io/controller-runtime/pkg/log/zap"
+	"sigs.k8s.io/controller-runtime/pkg/metrics/filters"
+	metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
+	"sigs.k8s.io/controller-runtime/pkg/webhook"
+
+	servingv1alpha1 "production-stack/api/v1alpha1"
+	"production-stack/internal/controller"
+	// +kubebuilder:scaffold:imports
+)
+
+var (
+	scheme   = runtime.NewScheme()
+	setupLog = ctrl.Log.WithName("setup")
+)
+
+func init() {
+	utilruntime.Must(clientgoscheme.AddToScheme(scheme))
+
+	utilruntime.Must(servingv1alpha1.AddToScheme(scheme))
+	// +kubebuilder:scaffold:scheme
+}
+
+// nolint:gocyclo
+func main() {
+	var metricsAddr string
+	var metricsCertPath, metricsCertName, metricsCertKey string
+	var webhookCertPath, webhookCertName, webhookCertKey string
+	var enableLeaderElection bool
+	var probeAddr string
+	var secureMetrics bool
+	var enableHTTP2 bool
+	var tlsOpts []func(*tls.Config)
+	flag.StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metrics endpoint binds to. "+
+		"Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.")
+	flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
+	flag.BoolVar(&enableLeaderElection, "leader-elect", false,
+		"Enable leader election for controller manager. "+
+			"Enabling this will ensure there is only one active controller manager.")
+	flag.BoolVar(&secureMetrics, "metrics-secure", true,
+		"If set, the metrics endpoint is served securely via HTTPS. Use --metrics-secure=false to use HTTP instead.")
+	flag.StringVar(&webhookCertPath, "webhook-cert-path", "", "The directory that contains the webhook certificate.")
+	flag.StringVar(&webhookCertName, "webhook-cert-name", "tls.crt", "The name of the webhook certificate file.")
+	flag.StringVar(&webhookCertKey, "webhook-cert-key", "tls.key", "The name of the webhook key file.")
+	flag.StringVar(&metricsCertPath, "metrics-cert-path", "",
+		"The directory that contains the metrics server certificate.")
+	flag.StringVar(&metricsCertName, "metrics-cert-name", "tls.crt", "The name of the metrics server certificate file.")
+	flag.StringVar(&metricsCertKey, "metrics-cert-key", "tls.key", "The name of the metrics server key file.")
+	flag.BoolVar(&enableHTTP2, "enable-http2", false,
+		"If set, HTTP/2 will be enabled for the metrics and webhook servers")
+	opts := zap.Options{
+		Development: true,
+	}
+	opts.BindFlags(flag.CommandLine)
+	flag.Parse()
+
+	ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
+
+	// if the enable-http2 flag is false (the default), http/2 should be disabled
+	// due to its vulnerabilities. More specifically, disabling http/2 will
+	// prevent from being vulnerable to the HTTP/2 Stream Cancellation and
+	// Rapid Reset CVEs. For more information see:
+	// - https://github.com/advisories/GHSA-qppj-fm5r-hxr3
+	// - https://github.com/advisories/GHSA-4374-p667-p6c8
+	disableHTTP2 := func(c *tls.Config) {
+		setupLog.Info("disabling http/2")
+		c.NextProtos = []string{"http/1.1"}
+	}
+
+	if !enableHTTP2 {
+		tlsOpts = append(tlsOpts, disableHTTP2)
+	}
+
+	// Create watchers for metrics and webhooks certificates
+	var metricsCertWatcher, webhookCertWatcher *certwatcher.CertWatcher
+
+	// Initial webhook TLS options
+	webhookTLSOpts := tlsOpts
+
+	if len(webhookCertPath) > 0 {
+		setupLog.Info("Initializing webhook certificate watcher using provided certificates",
+			"webhook-cert-path", webhookCertPath, "webhook-cert-name", webhookCertName, "webhook-cert-key", webhookCertKey)
+
+		var err error
+		webhookCertWatcher, err = certwatcher.New(
+			filepath.Join(webhookCertPath, webhookCertName),
+			filepath.Join(webhookCertPath, webhookCertKey),
+		)
+		if err != nil {
+			setupLog.Error(err, "Failed to initialize webhook certificate watcher")
+			os.Exit(1)
+		}
+
+		webhookTLSOpts = append(webhookTLSOpts, func(config *tls.Config) {
+			config.GetCertificate = webhookCertWatcher.GetCertificate
+		})
+	}
+
+	webhookServer := webhook.NewServer(webhook.Options{
+		TLSOpts: webhookTLSOpts,
+	})
+
+	// Metrics endpoint is enabled in 'config/default/kustomization.yaml'. The Metrics options configure the server.
+	// More info:
+	// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.20.4/pkg/metrics/server
+	// - https://book.kubebuilder.io/reference/metrics.html
+	metricsServerOptions := metricsserver.Options{
+		BindAddress:   metricsAddr,
+		SecureServing: secureMetrics,
+		TLSOpts:       tlsOpts,
+	}
+
+	if secureMetrics {
+		// FilterProvider is used to protect the metrics endpoint with authn/authz.
+		// These configurations ensure that only authorized users and service accounts
+		// can access the metrics endpoint. The RBAC are configured in 'config/rbac/kustomization.yaml'. More info:
+		// https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.20.4/pkg/metrics/filters#WithAuthenticationAndAuthorization
+		metricsServerOptions.FilterProvider = filters.WithAuthenticationAndAuthorization
+	}
+
+	// If the certificate is not specified, controller-runtime will automatically
+	// generate self-signed certificates for the metrics server. While convenient for development and testing,
+	// this setup is not recommended for production.
+	//
+	// TODO(user): If you enable certManager, uncomment the following lines:
+	// - [METRICS-WITH-CERTS] at config/default/kustomization.yaml to generate and use certificates
+	// managed by cert-manager for the metrics server.
+	// - [PROMETHEUS-WITH-CERTS] at config/prometheus/kustomization.yaml for TLS certification.
+	if len(metricsCertPath) > 0 {
+		setupLog.Info("Initializing metrics certificate watcher using provided certificates",
+			"metrics-cert-path", metricsCertPath, "metrics-cert-name", metricsCertName, "metrics-cert-key", metricsCertKey)
+
+		var err error
+		metricsCertWatcher, err = certwatcher.New(
+			filepath.Join(metricsCertPath, metricsCertName),
+			filepath.Join(metricsCertPath, metricsCertKey),
+		)
+		if err != nil {
+			setupLog.Error(err, "to initialize metrics certificate watcher", "error", err)
+			os.Exit(1)
+		}
+
+		metricsServerOptions.TLSOpts = append(metricsServerOptions.TLSOpts, func(config *tls.Config) {
+			config.GetCertificate = metricsCertWatcher.GetCertificate
+		})
+	}
+
+	mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
+		Scheme:                 scheme,
+		Metrics:                metricsServerOptions,
+		WebhookServer:          webhookServer,
+		HealthProbeBindAddress: probeAddr,
+		LeaderElection:         enableLeaderElection,
+		LeaderElectionID:       "4549d26f.vllm.ai",
+		// LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily
+		// when the Manager ends. This requires the binary to immediately end when the
+		// Manager is stopped, otherwise, this setting is unsafe. Setting this significantly
+		// speeds up voluntary leader transitions as the new leader don't have to wait
+		// LeaseDuration time first.
+		//
+		// In the default scaffold provided, the program ends immediately after
+		// the manager stops, so would be fine to enable this option. However,
+		// if you are doing or is intended to do any operation such as perform cleanups
+		// after the manager stops then its usage might be unsafe.
+		// LeaderElectionReleaseOnCancel: true,
+	})
+	if err != nil {
+		setupLog.Error(err, "unable to start manager")
+		os.Exit(1)
+	}
+
+	if err = (&controller.VLLMRouterReconciler{
+		Client: mgr.GetClient(),
+		Scheme: mgr.GetScheme(),
+	}).SetupWithManager(mgr); err != nil {
+		setupLog.Error(err, "unable to create controller", "controller", "VLLMRouter")
+		os.Exit(1)
+	}
+
+	if err = (&controller.VLLMRuntimeReconciler{
+		Client: mgr.GetClient(),
+		Scheme: mgr.GetScheme(),
+	}).SetupWithManager(mgr); err != nil {
+		setupLog.Error(err, "unable to create controller", "controller", "VLLMRuntime")
+		os.Exit(1)
+	}
+	// +kubebuilder:scaffold:builder
+
+	if metricsCertWatcher != nil {
+		setupLog.Info("Adding metrics certificate watcher to manager")
+		if err := mgr.Add(metricsCertWatcher); err != nil {
+			setupLog.Error(err, "unable to add metrics certificate watcher to manager")
+			os.Exit(1)
+		}
+	}
+
+	if webhookCertWatcher != nil {
+		setupLog.Info("Adding webhook certificate watcher to manager")
+		if err := mgr.Add(webhookCertWatcher); err != nil {
+			setupLog.Error(err, "unable to add webhook certificate watcher to manager")
+			os.Exit(1)
+		}
+	}
+
+	if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
+		setupLog.Error(err, "unable to set up health check")
+		os.Exit(1)
+	}
+	if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
+		setupLog.Error(err, "unable to set up ready check")
+		os.Exit(1)
+	}
+
+	setupLog.Info("starting manager")
+	if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
+		setupLog.Error(err, "problem running manager")
+		os.Exit(1)
+	}
+}
diff --git a/config/crd/bases/production-stack.vllm.ai_staticroutes.yaml b/config/crd/bases/production-stack.vllm.ai_staticroutes.yaml
new file mode 100644
index 000000000..cd6dd48bd
--- /dev/null
+++ b/config/crd/bases/production-stack.vllm.ai_staticroutes.yaml
@@ -0,0 +1,218 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.17.2
+  name: staticroutes.production-stack.vllm.ai
+spec:
+  group: production-stack.vllm.ai
+  names:
+    kind: StaticRoute
+    listKind: StaticRouteList
+    plural: staticroutes
+    singular: staticroute
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: StaticRoute is the Schema for the staticroutes API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: StaticRouteSpec defines the desired state of StaticRoute
+            properties:
+              configMapName:
+                description: ConfigMapName is the name of the ConfigMap to create
+                  with the dynamic config
+                type: string
+              healthCheck:
+                description: HealthCheck defines the health check configuration for
+                  the router
+                properties:
+                  failureThreshold:
+                    default: 3
+                    description: Minimum consecutive failures for the probe to be
+                      considered failed
+                    format: int32
+                    minimum: 1
+                    type: integer
+                  periodSeconds:
+                    default: 10
+                    description: Number of seconds between probe attempts
+                    format: int32
+                    minimum: 1
+                    type: integer
+                  successThreshold:
+                    default: 1
+                    description: Minimum consecutive successes for the probe to be
+                      considered successful
+                    format: int32
+                    minimum: 1
+                    type: integer
+                  timeoutSeconds:
+                    default: 5
+                    description: Number of seconds after which the probe times out
+                    format: int32
+                    minimum: 1
+                    type: integer
+                type: object
+              routerRef:
+                description: RouterRef is a reference to the router service
+                properties:
+                  apiVersion:
+                    description: API version of the referent.
+                    type: string
+                  fieldPath:
+                    description: |-
+                      If referring to a piece of an object instead of an entire object, this string
+                      should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
+                      For example, if the object reference is to a container within a pod, this would take on a value like:
+                      "spec.containers{name}" (where "name" refers to the name of the container that triggered
+                      the event) or if no container name is specified "spec.containers[2]" (container with
+                      index 2 in this pod). This syntax is chosen only to have some well-defined way of
+                      referencing a part of an object.
+                    type: string
+                  kind:
+                    description: |-
+                      Kind of the referent.
+                      More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+                    type: string
+                  name:
+                    description: |-
+                      Name of the referent.
+                      More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                    type: string
+                  namespace:
+                    description: |-
+                      Namespace of the referent.
+                      More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
+                    type: string
+                  resourceVersion:
+                    description: |-
+                      Specific resourceVersion to which this reference is made, if any.
+                      More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
+                    type: string
+                  uid:
+                    description: |-
+                      UID of the referent.
+                      More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
+                    type: string
+                type: object
+                x-kubernetes-map-type: atomic
+              routingLogic:
+                default: roundrobin
+                description: RoutingLogic specifies the routing logic to use
+                enum:
+                - roundrobin
+                type: string
+              serviceDiscovery:
+                default: static
+                description: ServiceDiscovery specifies the service discovery method
+                enum:
+                - static
+                type: string
+              staticBackends:
+                description: StaticBackends is a comma-separated list of backend URLs
+                type: string
+              staticModels:
+                description: StaticModels is a comma-separated list of model names
+                type: string
+            required:
+            - routingLogic
+            - serviceDiscovery
+            - staticBackends
+            - staticModels
+            type: object
+          status:
+            description: StaticRouteStatus defines the observed state of StaticRoute
+            properties:
+              conditions:
+                description: Conditions represent the latest available observations
+                  of the StaticRoute's state
+                items:
+                  description: Condition contains details for one aspect of the current
+                    state of this API Resource.
+                  properties:
+                    lastTransitionTime:
+                      description: |-
+                        lastTransitionTime is the last time the condition transitioned from one status to another.
+                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
+                      format: date-time
+                      type: string
+                    message:
+                      description: |-
+                        message is a human readable message indicating details about the transition.
+                        This may be an empty string.
+                      maxLength: 32768
+                      type: string
+                    observedGeneration:
+                      description: |-
+                        observedGeneration represents the .metadata.generation that the condition was set based upon.
+                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+                        with respect to the current state of the instance.
+                      format: int64
+                      minimum: 0
+                      type: integer
+                    reason:
+                      description: |-
+                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                        Producers of specific condition types may define expected values and meanings for this field,
+                        and whether the values are considered a guaranteed API.
+                        The value should be a CamelCase string.
+                        This field may not be empty.
+                      maxLength: 1024
+                      minLength: 1
+                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                      type: string
+                    status:
+                      description: status of the condition, one of True, False, Unknown.
+                      enum:
+                      - "True"
+                      - "False"
+                      - Unknown
+                      type: string
+                    type:
+                      description: type of condition in CamelCase or in foo.example.com/CamelCase.
+                      maxLength: 316
+                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                      type: string
+                  required:
+                  - lastTransitionTime
+                  - message
+                  - reason
+                  - status
+                  - type
+                  type: object
+                type: array
+              configMapRef:
+                description: ConfigMapRef is a reference to the created ConfigMap
+                type: string
+              lastAppliedTime:
+                description: LastAppliedTime is the last time the configuration was
+                  applied to the router
+                format: date-time
+                type: string
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/config/crd/bases/serving.vllm.ai_vllmrouters.yaml b/config/crd/bases/serving.vllm.ai_vllmrouters.yaml
new file mode 100644
index 000000000..7445c2e4c
--- /dev/null
+++ b/config/crd/bases/serving.vllm.ai_vllmrouters.yaml
@@ -0,0 +1,251 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.17.2
+  name: vllmrouters.serving.vllm.ai
+spec:
+  group: serving.vllm.ai
+  names:
+    kind: VLLMRouter
+    listKind: VLLMRouterList
+    plural: vllmrouters
+    singular: vllmrouter
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: VLLMRouter is the Schema for the vllmrouters API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: VLLMRouterSpec defines the desired state of VLLMRouter
+            properties:
+              enableRouter:
+                default: true
+                description: EnableRouter determines if the router should be deployed
+                type: boolean
+              engineScrapeInterval:
+                description: EngineScrapeInterval for collecting engine statistics
+                type: string
+              env:
+                description: Environment variables
+                items:
+                  description: EnvVar represents an environment variable
+                  properties:
+                    name:
+                      type: string
+                    value:
+                      type: string
+                  required:
+                  - name
+                  - value
+                  type: object
+                type: array
+              extraArgs:
+                description: ExtraArgs for additional router arguments
+                items:
+                  type: string
+                type: array
+              image:
+                description: Image configuration
+                properties:
+                  name:
+                    type: string
+                  pullPolicy:
+                    type: string
+                  pullSecretName:
+                    type: string
+                  registry:
+                    type: string
+                required:
+                - name
+                - registry
+                type: object
+              nodeSelectorTerms:
+                description: NodeSelectorTerms for pod scheduling
+                items:
+                  description: |-
+                    A null or empty node selector term matches no objects. The requirements of
+                    them are ANDed.
+                    The TopologySelectorTerm type implements a subset of the NodeSelectorTerm.
+                  properties:
+                    matchExpressions:
+                      description: A list of node selector requirements by node's
+                        labels.
+                      items:
+                        description: |-
+                          A node selector requirement is a selector that contains values, a key, and an operator
+                          that relates the key and values.
+                        properties:
+                          key:
+                            description: The label key that the selector applies to.
+                            type: string
+                          operator:
+                            description: |-
+                              Represents a key's relationship to a set of values.
+                              Valid operators are In, Not In, Exists, DoesNotExist. Gt, and Lt.
+                            type: string
+                          values:
+                            description: |-
+                              An array of string values. If the operator is In or Not In,
+                              the values array must be non-empty. If the operator is Exists or DoesNotExist,
+                              the values array must be empty. If the operator is Gt or Lt, the values
+                              array must have a single element, which will be interpreted as an integer.
+                              This array is replaced during a strategic merge patch.
+                            items:
+                              type: string
+                            type: array
+                            x-kubernetes-list-type: atomic
+                        required:
+                        - key
+                        - operator
+                        type: object
+                      type: array
+                      x-kubernetes-list-type: atomic
+                    matchFields:
+                      description: A list of node selector requirements by node's
+                        fields.
+                      items:
+                        description: |-
+                          A node selector requirement is a selector that contains values, a key, and an operator
+                          that relates the key and values.
+                        properties:
+                          key:
+                            description: The label key that the selector applies to.
+                            type: string
+                          operator:
+                            description: |-
+                              Represents a key's relationship to a set of values.
+                              Valid operators are In, Not In, Exists, DoesNotExist. Gt, and Lt.
+                            type: string
+                          values:
+                            description: |-
+                              An array of string values. If the operator is In or Not In,
+                              the values array must be non-empty. If the operator is Exists or DoesNotExist,
+                              the values array must be empty. If the operator is Gt or Lt, the values
+                              array must have a single element, which will be interpreted as an integer.
+                              This array is replaced during a strategic merge patch.
+                            items:
+                              type: string
+                            type: array
+                            x-kubernetes-list-type: atomic
+                        required:
+                        - key
+                        - operator
+                        type: object
+                      type: array
+                      x-kubernetes-list-type: atomic
+                  type: object
+                  x-kubernetes-map-type: atomic
+                type: array
+              port:
+                default: 80
+                description: ContainerPort for the router service
+                format: int32
+                type: integer
+              replicas:
+                default: 1
+                description: Replicas specifies the number of router replicas
+                format: int32
+                type: integer
+              requestStatsWindow:
+                description: RequestStatsWindow for request statistics
+                type: string
+              resources:
+                description: Resource requirements
+                properties:
+                  cpu:
+                    type: string
+                  gpu:
+                    type: string
+                  memory:
+                    type: string
+                type: object
+              routingLogic:
+                default: roundrobin
+                description: RoutingLogic specifies the routing strategy
+                enum:
+                - roundrobin
+                - session
+                type: string
+              serviceAccountName:
+                description: ServiceAccountName for the router pod
+                type: string
+              serviceDiscovery:
+                default: k8s
+                description: ServiceDiscovery specifies the service discovery method
+                  (k8s or static)
+                enum:
+                - k8s
+                - static
+                type: string
+              sessionKey:
+                default: ""
+                description: SessionKey for session-based routing
+                type: string
+              staticBackends:
+                description: StaticBackends is required when using static service
+                  discovery
+                type: string
+              staticModels:
+                description: StaticModels is required when using static service discovery
+                type: string
+              vllmApiKeyName:
+                type: string
+              vllmApiKeySecret:
+                description: VLLM API Key configuration
+                properties:
+                  name:
+                    default: ""
+                    description: |-
+                      Name of the referent.
+                      This field is effectively required, but due to backwards compatibility is
+                      allowed to be empty. Instances of this type with an empty value here are
+                      almost certainly wrong.
+                      More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                    type: string
+                type: object
+                x-kubernetes-map-type: atomic
+            required:
+            - image
+            - resources
+            type: object
+          status:
+            description: VLLMRouterStatus defines the observed state of VLLMRouter
+            properties:
+              activeRuntimes:
+                description: Number of active runtimes
+                format: int32
+                type: integer
+              lastUpdated:
+                description: Last updated timestamp
+                format: date-time
+                type: string
+              status:
+                description: Router status
+                type: string
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/config/crd/bases/serving.vllm.ai_vllmruntimes.yaml b/config/crd/bases/serving.vllm.ai_vllmruntimes.yaml
new file mode 100644
index 000000000..311dc82a8
--- /dev/null
+++ b/config/crd/bases/serving.vllm.ai_vllmruntimes.yaml
@@ -0,0 +1,216 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.17.2
+  name: vllmruntimes.serving.vllm.ai
+spec:
+  group: serving.vllm.ai
+  names:
+    kind: VLLMRuntime
+    listKind: VLLMRuntimeList
+    plural: vllmruntimes
+    shortNames:
+    - vr
+    singular: vllmruntime
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: VLLMRuntime is the Schema for the vllmruntimes API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: VLLMRuntimeSpec defines the desired state of VLLMRuntime
+            properties:
+              deploymentStrategy:
+                default: RollingUpdate
+                description: Deploy strategy
+                enum:
+                - RollingUpdate
+                - Recreate
+                type: string
+              enableChunkedPrefill:
+                description: Enable chunked prefill
+                type: boolean
+              enablePrefixCaching:
+                description: Enable prefix caching
+                type: boolean
+              env:
+                description: Environment variables
+                items:
+                  description: EnvVar represents an environment variable
+                  properties:
+                    name:
+                      type: string
+                    value:
+                      type: string
+                  required:
+                  - name
+                  - value
+                  type: object
+                type: array
+              extraArgs:
+                description: Extra arguments for vllm serve
+                items:
+                  type: string
+                type: array
+              gpuMemoryUtilization:
+                description: GPU memory utilization
+                type: string
+              hfTokenName:
+                default: token
+                type: string
+              hfTokenSecret:
+                description: HuggingFace token secret
+                properties:
+                  name:
+                    default: ""
+                    description: |-
+                      Name of the referent.
+                      This field is effectively required, but due to backwards compatibility is
+                      allowed to be empty. Instances of this type with an empty value here are
+                      almost certainly wrong.
+                      More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                    type: string
+                type: object
+                x-kubernetes-map-type: atomic
+              image:
+                description: Image configuration
+                properties:
+                  name:
+                    type: string
+                  pullPolicy:
+                    type: string
+                  pullSecretName:
+                    type: string
+                  registry:
+                    type: string
+                required:
+                - name
+                - registry
+                type: object
+              lmCacheConfig:
+                description: LM Cache configuration
+                properties:
+                  cpuOffloadingBufferSize:
+                    default: 4Gi
+                    description: CPUOffloadingBufferSize is the size of the CPU offloading
+                      buffer
+                    type: string
+                  diskOffloadingBufferSize:
+                    default: 8Gi
+                    description: DiskOffloadingBufferSize is the size of the disk
+                      offloading buffer
+                    type: string
+                  enabled:
+                    default: false
+                    description: Enabled enables LM Cache
+                    type: boolean
+                  remoteSerde:
+                    description: RemoteSerde is the serialization format for the remote
+                      cache
+                    type: string
+                  remoteUrl:
+                    description: RemoteURL is the URL of the remote cache server
+                    type: string
+                type: object
+              maxLoras:
+                description: Maximum number of LoRAs
+                format: int32
+                type: integer
+              model:
+                description: Model configuration
+                properties:
+                  dtype:
+                    description: Data type
+                    type: string
+                  enableLoRA:
+                    description: Enable LoRA
+                    type: boolean
+                  enableTool:
+                    description: Enable tool
+                    type: boolean
+                  maxModelLen:
+                    description: Maximum model length
+                    format: int32
+                    type: integer
+                  maxNumSeqs:
+                    description: Maximum number of sequences
+                    format: int32
+                    type: integer
+                  modelURL:
+                    description: Model URL
+                    type: string
+                  toolCallParser:
+                    description: Tool call parser
+                    type: string
+                required:
+                - modelURL
+                type: object
+              port:
+                default: 8000
+                description: Port for vLLM server
+                format: int32
+                type: integer
+              replicas:
+                default: 1
+                description: Replicas
+                format: int32
+                type: integer
+              resources:
+                description: Resource requirements
+                properties:
+                  cpu:
+                    type: string
+                  gpu:
+                    type: string
+                  memory:
+                    type: string
+                type: object
+              tensorParallelSize:
+                description: Tensor parallel size
+                format: int32
+                type: integer
+              v1:
+                description: Use V1 API
+                type: boolean
+            required:
+            - image
+            - model
+            - resources
+            type: object
+          status:
+            description: VLLMRuntimeStatus defines the observed state of VLLMRuntime
+            properties:
+              lastUpdated:
+                description: Last updated timestamp
+                format: date-time
+                type: string
+              modelStatus:
+                description: Model status
+                type: string
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml
new file mode 100644
index 000000000..500f2b34a
--- /dev/null
+++ b/config/crd/kustomization.yaml
@@ -0,0 +1,17 @@
+# This kustomization.yaml is not intended to be run by itself,
+# since it depends on service name and namespace that are out of this kustomize package.
+# It should be run by config/default
+resources:
+- bases/serving.vllm.ai_vllmruntimes.yaml
+- bases/serving.vllm.ai_vllmrouters.yaml
+# +kubebuilder:scaffold:crdkustomizeresource
+
+patches:
+# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix.
+# patches here are for enabling the conversion webhook for each CRD
+# +kubebuilder:scaffold:crdkustomizewebhookpatch
+
+# [WEBHOOK] To enable webhook, uncomment the following section
+# the following config is for teaching kustomize how to do kustomization for CRDs.
+#configurations:
+#- kustomizeconfig.yaml
diff --git a/config/crd/kustomizeconfig.yaml b/config/crd/kustomizeconfig.yaml
new file mode 100644
index 000000000..ec5c150a9
--- /dev/null
+++ b/config/crd/kustomizeconfig.yaml
@@ -0,0 +1,19 @@
+# This file is for teaching kustomize how to substitute name and namespace reference in CRD
+nameReference:
+- kind: Service
+  version: v1
+  fieldSpecs:
+  - kind: CustomResourceDefinition
+    version: v1
+    group: apiextensions.k8s.io
+    path: spec/conversion/webhook/clientConfig/service/name
+
+namespace:
+- kind: CustomResourceDefinition
+  version: v1
+  group: apiextensions.k8s.io
+  path: spec/conversion/webhook/clientConfig/service/namespace
+  create: false
+
+varReference:
+- path: metadata/annotations
diff --git a/config/default/cert_metrics_manager_patch.yaml b/config/default/cert_metrics_manager_patch.yaml
new file mode 100644
index 000000000..d97501553
--- /dev/null
+++ b/config/default/cert_metrics_manager_patch.yaml
@@ -0,0 +1,30 @@
+# This patch adds the args, volumes, and ports to allow the manager to use the metrics-server certs.
+
+# Add the volumeMount for the metrics-server certs
+- op: add
+  path: /spec/template/spec/containers/0/volumeMounts/-
+  value:
+    mountPath: /tmp/k8s-metrics-server/metrics-certs
+    name: metrics-certs
+    readOnly: true
+
+# Add the --metrics-cert-path argument for the metrics server
+- op: add
+  path: /spec/template/spec/containers/0/args/-
+  value: --metrics-cert-path=/tmp/k8s-metrics-server/metrics-certs
+
+# Add the metrics-server certs volume configuration
+- op: add
+  path: /spec/template/spec/volumes/-
+  value:
+    name: metrics-certs
+    secret:
+      secretName: metrics-server-cert
+      optional: false
+      items:
+        - key: ca.crt
+          path: ca.crt
+        - key: tls.crt
+          path: tls.crt
+        - key: tls.key
+          path: tls.key
diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml
new file mode 100644
index 000000000..db3e47347
--- /dev/null
+++ b/config/default/kustomization.yaml
@@ -0,0 +1,234 @@
+# Adds namespace to all resources.
+namespace: production-stack-system
+
+# Value of this field is prepended to the
+# names of all resources, e.g. a deployment named
+# "wordpress" becomes "alices-wordpress".
+# Note that it should also match with the prefix (text before '-') of the namespace
+# field above.
+namePrefix: production-stack-
+
+# Labels to add to all resources and selectors.
+#labels:
+#- includeSelectors: true
+#  pairs:
+#    someName: someValue
+
+resources:
+- ../crd
+- ../rbac
+- ../manager
+# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
+# crd/kustomization.yaml
+#- ../webhook
+# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required.
+#- ../certmanager
+# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
+#- ../prometheus
+# [METRICS] Expose the controller manager metrics service.
+- metrics_service.yaml
+# [NETWORK POLICY] Protect the /metrics endpoint and Webhook Server with NetworkPolicy.
+# Only Pod(s) running a namespace labeled with 'metrics: enabled' will be able to gather the metrics.
+# Only CR(s) which requires webhooks and are applied on namespaces labeled with 'webhooks: enabled' will
+# be able to communicate with the Webhook Server.
+#- ../network-policy
+
+# Uncomment the patches line if you enable Metrics
+patches:
+# [METRICS] The following patch will enable the metrics endpoint using HTTPS and the port :8443.
+# More info: https://book.kubebuilder.io/reference/metrics
+- path: manager_metrics_patch.yaml
+  target:
+    kind: Deployment
+
+# Uncomment the patches line if you enable Metrics and CertManager
+# [METRICS-WITH-CERTS] To enable metrics protected with certManager, uncomment the following line.
+# This patch will protect the metrics with certManager self-signed certs.
+#- path: cert_metrics_manager_patch.yaml
+#  target:
+#    kind: Deployment
+
+# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
+# crd/kustomization.yaml
+#- path: manager_webhook_patch.yaml
+#  target:
+#    kind: Deployment
+
+# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix.
+# Uncomment the following replacements to add the cert-manager CA injection annotations
+#replacements:
+# - source: # Uncomment the following block to enable certificates for metrics
+#     kind: Service
+#     version: v1
+#     name: controller-manager-metrics-service
+#     fieldPath: metadata.name
+#   targets:
+#     - select:
+#         kind: Certificate
+#         group: cert-manager.io
+#         version: v1
+#         name: metrics-certs
+#       fieldPaths:
+#         - spec.dnsNames.0
+#         - spec.dnsNames.1
+#       options:
+#         delimiter: '.'
+#         index: 0
+#         create: true
+#     - select: # Uncomment the following to set the Service name for TLS config in Prometheus ServiceMonitor
+#         kind: ServiceMonitor
+#         group: monitoring.coreos.com
+#         version: v1
+#         name: controller-manager-metrics-monitor
+#       fieldPaths:
+#         - spec.endpoints.0.tlsConfig.serverName
+#       options:
+#         delimiter: '.'
+#         index: 0
+#         create: true
+#
+# - source:
+#     kind: Service
+#     version: v1
+#     name: controller-manager-metrics-service
+#     fieldPath: metadata.namespace
+#   targets:
+#     - select:
+#         kind: Certificate
+#         group: cert-manager.io
+#         version: v1
+#         name: metrics-certs
+#       fieldPaths:
+#         - spec.dnsNames.0
+#         - spec.dnsNames.1
+#       options:
+#         delimiter: '.'
+#         index: 1
+#         create: true
+#     - select: # Uncomment the following to set the Service namespace for TLS in Prometheus ServiceMonitor
+#         kind: ServiceMonitor
+#         group: monitoring.coreos.com
+#         version: v1
+#         name: controller-manager-metrics-monitor
+#       fieldPaths:
+#         - spec.endpoints.0.tlsConfig.serverName
+#       options:
+#         delimiter: '.'
+#         index: 1
+#         create: true
+#
+# - source: # Uncomment the following block if you have any webhook
+#     kind: Service
+#     version: v1
+#     name: webhook-service
+#     fieldPath: .metadata.name # Name of the service
+#   targets:
+#     - select:
+#         kind: Certificate
+#         group: cert-manager.io
+#         version: v1
+#         name: serving-cert
+#       fieldPaths:
+#         - .spec.dnsNames.0
+#         - .spec.dnsNames.1
+#       options:
+#         delimiter: '.'
+#         index: 0
+#         create: true
+# - source:
+#     kind: Service
+#     version: v1
+#     name: webhook-service
+#     fieldPath: .metadata.namespace # Namespace of the service
+#   targets:
+#     - select:
+#         kind: Certificate
+#         group: cert-manager.io
+#         version: v1
+#         name: serving-cert
+#       fieldPaths:
+#         - .spec.dnsNames.0
+#         - .spec.dnsNames.1
+#       options:
+#         delimiter: '.'
+#         index: 1
+#         create: true
+#
+# - source: # Uncomment the following block if you have a ValidatingWebhook (--programmatic-validation)
+#     kind: Certificate
+#     group: cert-manager.io
+#     version: v1
+#     name: serving-cert # This name should match the one in certificate.yaml
+#     fieldPath: .metadata.namespace # Namespace of the certificate CR
+#   targets:
+#     - select:
+#         kind: ValidatingWebhookConfiguration
+#       fieldPaths:
+#         - .metadata.annotations.[cert-manager.io/inject-ca-from]
+#       options:
+#         delimiter: '/'
+#         index: 0
+#         create: true
+# - source:
+#     kind: Certificate
+#     group: cert-manager.io
+#     version: v1
+#     name: serving-cert
+#     fieldPath: .metadata.name
+#   targets:
+#     - select:
+#         kind: ValidatingWebhookConfiguration
+#       fieldPaths:
+#         - .metadata.annotations.[cert-manager.io/inject-ca-from]
+#       options:
+#         delimiter: '/'
+#         index: 1
+#         create: true
+#
+# - source: # Uncomment the following block if you have a DefaultingWebhook (--defaulting )
+#     kind: Certificate
+#     group: cert-manager.io
+#     version: v1
+#     name: serving-cert
+#     fieldPath: .metadata.namespace # Namespace of the certificate CR
+#   targets:
+#     - select:
+#         kind: MutatingWebhookConfiguration
+#       fieldPaths:
+#         - .metadata.annotations.[cert-manager.io/inject-ca-from]
+#       options:
+#         delimiter: '/'
+#         index: 0
+#         create: true
+# - source:
+#     kind: Certificate
+#     group: cert-manager.io
+#     version: v1
+#     name: serving-cert
+#     fieldPath: .metadata.name
+#   targets:
+#     - select:
+#         kind: MutatingWebhookConfiguration
+#       fieldPaths:
+#         - .metadata.annotations.[cert-manager.io/inject-ca-from]
+#       options:
+#         delimiter: '/'
+#         index: 1
+#         create: true
+#
+# - source: # Uncomment the following block if you have a ConversionWebhook (--conversion)
+#     kind: Certificate
+#     group: cert-manager.io
+#     version: v1
+#     name: serving-cert
+#     fieldPath: .metadata.namespace # Namespace of the certificate CR
+#   targets: # Do not remove or uncomment the following scaffold marker; required to generate code for target CRD.
+# +kubebuilder:scaffold:crdkustomizecainjectionns
+# - source:
+#     kind: Certificate
+#     group: cert-manager.io
+#     version: v1
+#     name: serving-cert
+#     fieldPath: .metadata.name
+#   targets: # Do not remove or uncomment the following scaffold marker; required to generate code for target CRD.
+# +kubebuilder:scaffold:crdkustomizecainjectionname
diff --git a/config/default/manager_metrics_patch.yaml b/config/default/manager_metrics_patch.yaml
new file mode 100644
index 000000000..2aaef6536
--- /dev/null
+++ b/config/default/manager_metrics_patch.yaml
@@ -0,0 +1,4 @@
+# This patch adds the args to allow exposing the metrics endpoint using HTTPS
+- op: add
+  path: /spec/template/spec/containers/0/args/0
+  value: --metrics-bind-address=:8443
diff --git a/config/default/metrics_service.yaml b/config/default/metrics_service.yaml
new file mode 100644
index 000000000..31e49eaef
--- /dev/null
+++ b/config/default/metrics_service.yaml
@@ -0,0 +1,18 @@
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    control-plane: controller-manager
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+  name: controller-manager-metrics-service
+  namespace: system
+spec:
+  ports:
+  - name: https
+    port: 8443
+    protocol: TCP
+    targetPort: 8443
+  selector:
+    control-plane: controller-manager
+    app.kubernetes.io/name: production-stack
diff --git a/config/manager/deployment.yaml b/config/manager/deployment.yaml
new file mode 100644
index 000000000..515739858
--- /dev/null
+++ b/config/manager/deployment.yaml
@@ -0,0 +1,92 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: production-stack-controller-manager
+  namespace: production-stack-system
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/instance: production-stack
+    app.kubernetes.io/component: manager
+    app.kubernetes.io/created-by: production-stack
+    app.kubernetes.io/part-of: production-stack
+    app.kubernetes.io/managed-by: kustomize
+spec:
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: production-stack
+      app.kubernetes.io/instance: production-stack
+      app.kubernetes.io/component: manager
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: production-stack
+        app.kubernetes.io/instance: production-stack
+        app.kubernetes.io/component: manager
+    spec:
+      # TODO(user): Uncomment the following code to configure the nodeAffinity expression
+      # according to the platforms which are supported by your solution.
+      # It is considered best practice to support multiple architectures. You can
+      # build your manager image using the makefile target docker-buildx.
+      # affinity:
+      #   nodeAffinity:
+      #     requiredDuringSchedulingIgnoredDuringExecution:
+      #       nodeSelectorTerms:
+      #         - matchExpressions:
+      #           - key: kubernetes.io/arch
+      #             operator: In
+      #             values:
+      #               - amd64
+      #               - arm64
+      #               - ppc64le
+      #               - s390x
+      #           - key: kubernetes.io/os
+      #             operator: In
+      #             values:
+      #               - linux
+      securityContext:
+        # Projects are configured by default to adhere to the "restricted" Pod Security Standards.
+        # This ensures that deployments meet the highest security requirements for Kubernetes.
+        # For more details, see: https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted
+        runAsNonRoot: true
+        seccompProfile:
+          type: RuntimeDefault
+      containers:
+      - command:
+        - /manager
+        args:
+          - --leader-elect
+          - --health-probe-bind-address=:8081
+        image: controller:latest
+        imagePullPolicy: Always
+        name: manager
+        ports: []
+        securityContext:
+          allowPrivilegeEscalation: false
+          capabilities:
+            drop:
+            - "ALL"
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: 8081
+          initialDelaySeconds: 15
+          periodSeconds: 20
+        readinessProbe:
+          httpGet:
+            path: /readyz
+            port: 8081
+          initialDelaySeconds: 5
+          periodSeconds: 10
+        # TODO(user): Configure the resources accordingly based on the project requirements.
+        # More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+        resources:
+          limits:
+            cpu: 500m
+            memory: 128Mi
+          requests:
+            cpu: 10m
+            memory: 64Mi
+        volumeMounts: []
+      volumes: []
+      serviceAccountName: production-stack-controller-manager
+      terminationGracePeriodSeconds: 10
diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml
new file mode 100644
index 000000000..f3af0a933
--- /dev/null
+++ b/config/manager/kustomization.yaml
@@ -0,0 +1,9 @@
+resources:
+- namespace.yaml
+- deployment.yaml
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+images:
+- name: controller
+  newName: controller
+  newTag: latest
diff --git a/config/manager/namespace.yaml b/config/manager/namespace.yaml
new file mode 100644
index 000000000..8084f7071
--- /dev/null
+++ b/config/manager/namespace.yaml
@@ -0,0 +1,8 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  labels:
+    control-plane: controller-manager
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+  name: system
diff --git a/config/network-policy/allow-metrics-traffic.yaml b/config/network-policy/allow-metrics-traffic.yaml
new file mode 100644
index 000000000..034723af1
--- /dev/null
+++ b/config/network-policy/allow-metrics-traffic.yaml
@@ -0,0 +1,27 @@
+# This NetworkPolicy allows ingress traffic
+# with Pods running on namespaces labeled with 'metrics: enabled'. Only Pods on those
+# namespaces are able to gather data from the metrics endpoint.
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+  name: allow-metrics-traffic
+  namespace: system
+spec:
+  podSelector:
+    matchLabels:
+      control-plane: controller-manager
+      app.kubernetes.io/name: production-stack
+  policyTypes:
+    - Ingress
+  ingress:
+    # This allows ingress traffic from any namespace with the label metrics: enabled
+    - from:
+      - namespaceSelector:
+          matchLabels:
+            metrics: enabled  # Only from namespaces with this label
+      ports:
+        - port: 8443
+          protocol: TCP
diff --git a/config/network-policy/kustomization.yaml b/config/network-policy/kustomization.yaml
new file mode 100644
index 000000000..ec0fb5e57
--- /dev/null
+++ b/config/network-policy/kustomization.yaml
@@ -0,0 +1,2 @@
+resources:
+- allow-metrics-traffic.yaml
diff --git a/config/prometheus/kustomization.yaml b/config/prometheus/kustomization.yaml
new file mode 100644
index 000000000..fdc5481b1
--- /dev/null
+++ b/config/prometheus/kustomization.yaml
@@ -0,0 +1,11 @@
+resources:
+- monitor.yaml
+
+# [PROMETHEUS-WITH-CERTS] The following patch configures the ServiceMonitor in ../prometheus
+# to securely reference certificates created and managed by cert-manager.
+# Additionally, ensure that you uncomment the [METRICS WITH CERTMANAGER] patch under config/default/kustomization.yaml
+# to mount the "metrics-server-cert" secret in the Manager Deployment.
+#patches:
+#  - path: monitor_tls_patch.yaml
+#    target:
+#      kind: ServiceMonitor
diff --git a/config/prometheus/monitor.yaml b/config/prometheus/monitor.yaml
new file mode 100644
index 000000000..e5272d7b8
--- /dev/null
+++ b/config/prometheus/monitor.yaml
@@ -0,0 +1,27 @@
+# Prometheus Monitor Service (Metrics)
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  labels:
+    control-plane: controller-manager
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+  name: controller-manager-metrics-monitor
+  namespace: system
+spec:
+  endpoints:
+    - path: /metrics
+      port: https # Ensure this is the name of the port that exposes HTTPS metrics
+      scheme: https
+      bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
+      tlsConfig:
+        # TODO(user): The option insecureSkipVerify: true is not recommended for production since it disables
+        # certificate verification, exposing the system to potential man-in-the-middle attacks.
+        # For production environments, it is recommended to use cert-manager for automatic TLS certificate management.
+        # To apply this configuration, enable cert-manager and use the patch located at config/prometheus/servicemonitor_tls_patch.yaml,
+        # which securely references the certificate from the 'metrics-server-cert' secret.
+        insecureSkipVerify: true
+  selector:
+    matchLabels:
+      control-plane: controller-manager
+      app.kubernetes.io/name: production-stack
diff --git a/config/prometheus/monitor_tls_patch.yaml b/config/prometheus/monitor_tls_patch.yaml
new file mode 100644
index 000000000..5bf84ce0d
--- /dev/null
+++ b/config/prometheus/monitor_tls_patch.yaml
@@ -0,0 +1,19 @@
+# Patch for Prometheus ServiceMonitor to enable secure TLS configuration
+# using certificates managed by cert-manager
+- op: replace
+  path: /spec/endpoints/0/tlsConfig
+  value:
+    # SERVICE_NAME and SERVICE_NAMESPACE will be substituted by kustomize
+    serverName: SERVICE_NAME.SERVICE_NAMESPACE.svc
+    insecureSkipVerify: false
+    ca:
+      secret:
+        name: metrics-server-cert
+        key: ca.crt
+    cert:
+      secret:
+        name: metrics-server-cert
+        key: tls.crt
+    keySecret:
+      name: metrics-server-cert
+      key: tls.key
diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml
new file mode 100644
index 000000000..7d46d2f99
--- /dev/null
+++ b/config/rbac/kustomization.yaml
@@ -0,0 +1,30 @@
+resources:
+# All RBAC will be applied under this service account in
+# the deployment namespace. You may comment out this resource
+# if your manager will use a service account that exists at
+# runtime. Be sure to update RoleBinding and ClusterRoleBinding
+# subjects if changing service account names.
+- service_account.yaml
+- role.yaml
+- role_binding.yaml
+- leader_election_role.yaml
+- leader_election_role_binding.yaml
+# The following RBAC configurations are used to protect
+# the metrics endpoint with authn/authz. These configurations
+# ensure that only authorized users and service accounts
+# can access the metrics endpoint. Comment the following
+# permissions if you want to disable this protection.
+# More info: https://book.kubebuilder.io/reference/metrics.html
+- metrics_auth_role.yaml
+- metrics_auth_role_binding.yaml
+- metrics_reader_role.yaml
+# For each CRD, "Admin", "Editor" and "Viewer" roles are scaffolded by
+# default, aiding admins in cluster management. Those roles are
+# not used by the {{ .ProjectName }} itself. You can comment the following lines
+# if you do not want those helpers be installed with your Project.
+- router_admin_role.yaml
+- router_editor_role.yaml
+- router_viewer_role.yaml
+- vllmruntime_admin_role.yaml
+- vllmruntime_editor_role.yaml
+- vllmruntime_viewer_role.yaml
diff --git a/config/rbac/leader_election_role.yaml b/config/rbac/leader_election_role.yaml
new file mode 100644
index 000000000..acffe969c
--- /dev/null
+++ b/config/rbac/leader_election_role.yaml
@@ -0,0 +1,40 @@
+# permissions to do leader election.
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+  name: leader-election-role
+rules:
+- apiGroups:
+  - ""
+  resources:
+  - configmaps
+  verbs:
+  - get
+  - list
+  - watch
+  - create
+  - update
+  - patch
+  - delete
+- apiGroups:
+  - coordination.k8s.io
+  resources:
+  - leases
+  verbs:
+  - get
+  - list
+  - watch
+  - create
+  - update
+  - patch
+  - delete
+- apiGroups:
+  - ""
+  resources:
+  - events
+  verbs:
+  - create
+  - patch
diff --git a/config/rbac/leader_election_role_binding.yaml b/config/rbac/leader_election_role_binding.yaml
new file mode 100644
index 000000000..ec33def98
--- /dev/null
+++ b/config/rbac/leader_election_role_binding.yaml
@@ -0,0 +1,15 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+  name: leader-election-rolebinding
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: leader-election-role
+subjects:
+- kind: ServiceAccount
+  name: controller-manager
+  namespace: system
diff --git a/config/rbac/metrics_auth_role.yaml b/config/rbac/metrics_auth_role.yaml
new file mode 100644
index 000000000..32d2e4ec6
--- /dev/null
+++ b/config/rbac/metrics_auth_role.yaml
@@ -0,0 +1,17 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: metrics-auth-role
+rules:
+- apiGroups:
+  - authentication.k8s.io
+  resources:
+  - tokenreviews
+  verbs:
+  - create
+- apiGroups:
+  - authorization.k8s.io
+  resources:
+  - subjectaccessreviews
+  verbs:
+  - create
diff --git a/config/rbac/metrics_auth_role_binding.yaml b/config/rbac/metrics_auth_role_binding.yaml
new file mode 100644
index 000000000..e775d67ff
--- /dev/null
+++ b/config/rbac/metrics_auth_role_binding.yaml
@@ -0,0 +1,12 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: metrics-auth-rolebinding
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: metrics-auth-role
+subjects:
+- kind: ServiceAccount
+  name: controller-manager
+  namespace: system
diff --git a/config/rbac/metrics_reader_role.yaml b/config/rbac/metrics_reader_role.yaml
new file mode 100644
index 000000000..51a75db47
--- /dev/null
+++ b/config/rbac/metrics_reader_role.yaml
@@ -0,0 +1,9 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: metrics-reader
+rules:
+- nonResourceURLs:
+  - "/metrics"
+  verbs:
+  - get
diff --git a/config/rbac/pod_viewer_role.yaml b/config/rbac/pod_viewer_role.yaml
new file mode 100644
index 000000000..b94a22369
--- /dev/null
+++ b/config/rbac/pod_viewer_role.yaml
@@ -0,0 +1,17 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: pod-viewer-role
+  namespace: default
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+rules:
+- apiGroups:
+  - ""
+  resources:
+  - pods
+  verbs:
+  - get
+  - list
+  - watch
diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml
new file mode 100644
index 000000000..7109b302c
--- /dev/null
+++ b/config/rbac/role.yaml
@@ -0,0 +1,95 @@
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: manager-role
+rules:
+- apiGroups:
+  - ""
+  resources:
+  - configmaps
+  - secrets
+  - services
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - ""
+  resources:
+  - pods
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - apps
+  resources:
+  - deployments
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - staticroutes
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - staticroutes/finalizers
+  verbs:
+  - update
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - staticroutes/status
+  verbs:
+  - get
+  - patch
+  - update
+- apiGroups:
+  - serving.vllm.ai
+  resources:
+  - vllmrouters
+  - vllmruntimes
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - serving.vllm.ai
+  resources:
+  - vllmrouters/finalizers
+  - vllmruntimes/finalizers
+  verbs:
+  - update
+- apiGroups:
+  - serving.vllm.ai
+  resources:
+  - vllmrouters/status
+  - vllmruntimes/status
+  verbs:
+  - get
+  - patch
+  - update
diff --git a/config/rbac/role_binding.yaml b/config/rbac/role_binding.yaml
new file mode 100644
index 000000000..b61dbe83f
--- /dev/null
+++ b/config/rbac/role_binding.yaml
@@ -0,0 +1,15 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+  name: manager-rolebinding
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: manager-role
+subjects:
+- kind: ServiceAccount
+  name: controller-manager
+  namespace: system
diff --git a/config/rbac/service_account.yaml b/config/rbac/service_account.yaml
new file mode 100644
index 000000000..8e3b2f377
--- /dev/null
+++ b/config/rbac/service_account.yaml
@@ -0,0 +1,8 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+  name: controller-manager
+  namespace: system
diff --git a/config/rbac/vllmrouter_admin_role.yaml b/config/rbac/vllmrouter_admin_role.yaml
new file mode 100644
index 000000000..a42914d6b
--- /dev/null
+++ b/config/rbac/vllmrouter_admin_role.yaml
@@ -0,0 +1,27 @@
+# This rule is not used by the project production-stack itself.
+# It is provided to allow the cluster admin to help manage permissions for users.
+#
+# Grants full permissions ('*') over serving.vllm.ai.
+# This role is intended for users authorized to modify roles and bindings within the cluster,
+# enabling them to delegate specific permissions to other users or groups as needed.
+
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+  name: vllmrouter-admin-role
+rules:
+- apiGroups:
+  - serving.vllm.ai
+  resources:
+  - routers
+  verbs:
+  - '*'
+- apiGroups:
+  - serving.vllm.ai
+  resources:
+  - routers/status
+  verbs:
+  - get
diff --git a/config/rbac/vllmrouter_editor_role.yaml b/config/rbac/vllmrouter_editor_role.yaml
new file mode 100644
index 000000000..0fbf83a34
--- /dev/null
+++ b/config/rbac/vllmrouter_editor_role.yaml
@@ -0,0 +1,33 @@
+# This rule is not used by the project production-stack itself.
+# It is provided to allow the cluster admin to help manage permissions for users.
+#
+# Grants permissions to create, update, and delete resources within the serving.vllm.ai.
+# This role is intended for users who need to manage these resources
+# but should not control RBAC or manage permissions for others.
+
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+  name: vllmrouter-editor-role
+rules:
+- apiGroups:
+  - serving.vllm.ai
+  resources:
+  - vllmrouters
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - serving.vllm.ai
+  resources:
+  - vllmrouters/status
+  verbs:
+  - get
diff --git a/config/rbac/vllmrouter_role_binding.yaml b/config/rbac/vllmrouter_role_binding.yaml
new file mode 100644
index 000000000..a29c577c8
--- /dev/null
+++ b/config/rbac/vllmrouter_role_binding.yaml
@@ -0,0 +1,16 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: pod-viewer-binding
+  namespace: default
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+subjects:
+- kind: ServiceAccount
+  name: vllmrouter-sa
+  namespace: default
+roleRef:
+  kind: Role
+  name: pod-viewer-role
+  apiGroup: rbac.authorization.k8s.io
diff --git a/config/rbac/vllmrouter_service_account.yaml b/config/rbac/vllmrouter_service_account.yaml
new file mode 100644
index 000000000..4bb14d72b
--- /dev/null
+++ b/config/rbac/vllmrouter_service_account.yaml
@@ -0,0 +1,8 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: vllmrouter-sa
+  namespace: default
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
diff --git a/config/rbac/vllmrouter_viewer_role.yaml b/config/rbac/vllmrouter_viewer_role.yaml
new file mode 100644
index 000000000..5da0be3d5
--- /dev/null
+++ b/config/rbac/vllmrouter_viewer_role.yaml
@@ -0,0 +1,29 @@
+# This rule is not used by the project production-stack itself.
+# It is provided to allow the cluster admin to help manage permissions for users.
+#
+# Grants read-only access to serving.vllm.ai resources.
+# This role is intended for users who need visibility into these resources
+# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing.
+
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+  name: vllmrouter-viewer-role
+rules:
+- apiGroups:
+  - serving.vllm.ai
+  resources:
+  - vllmrouters
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - serving.vllm.ai
+  resources:
+  - vllmrouters/status
+  verbs:
+  - get
diff --git a/config/rbac/vllmruntime_admin_role.yaml b/config/rbac/vllmruntime_admin_role.yaml
new file mode 100644
index 000000000..f765cb599
--- /dev/null
+++ b/config/rbac/vllmruntime_admin_role.yaml
@@ -0,0 +1,27 @@
+# This rule is not used by the project production-stack itself.
+# It is provided to allow the cluster admin to help manage permissions for users.
+#
+# Grants full permissions ('*') over serving.vllm.ai.
+# This role is intended for users authorized to modify roles and bindings within the cluster,
+# enabling them to delegate specific permissions to other users or groups as needed.
+
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+  name: vllmruntime-admin-role
+rules:
+- apiGroups:
+  - serving.vllm.ai
+  resources:
+  - vllmruntimes
+  verbs:
+  - '*'
+- apiGroups:
+  - serving.vllm.ai
+  resources:
+  - vllmruntimes/status
+  verbs:
+  - get
diff --git a/config/rbac/vllmruntime_editor_role.yaml b/config/rbac/vllmruntime_editor_role.yaml
new file mode 100644
index 000000000..c62691af5
--- /dev/null
+++ b/config/rbac/vllmruntime_editor_role.yaml
@@ -0,0 +1,33 @@
+# This rule is not used by the project production-stack itself.
+# It is provided to allow the cluster admin to help manage permissions for users.
+#
+# Grants permissions to create, update, and delete resources within the serving.vllm.ai.
+# This role is intended for users who need to manage these resources
+# but should not control RBAC or manage permissions for others.
+
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+  name: vllmruntime-editor-role
+rules:
+- apiGroups:
+  - serving.vllm.ai
+  resources:
+  - vllmruntimes
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - serving.vllm.ai
+  resources:
+  - vllmruntimes/status
+  verbs:
+  - get
diff --git a/config/rbac/vllmruntime_viewer_role.yaml b/config/rbac/vllmruntime_viewer_role.yaml
new file mode 100644
index 000000000..1314e4715
--- /dev/null
+++ b/config/rbac/vllmruntime_viewer_role.yaml
@@ -0,0 +1,29 @@
+# This rule is not used by the project production-stack itself.
+# It is provided to allow the cluster admin to help manage permissions for users.
+#
+# Grants read-only access to serving.vllm.ai resources.
+# This role is intended for users who need visibility into these resources
+# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing.
+
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+  name: vllmruntime-viewer-role
+rules:
+- apiGroups:
+  - serving.vllm.ai
+  resources:
+  - vllmruntimes
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - serving.vllm.ai
+  resources:
+  - vllmruntimes/status
+  verbs:
+  - get
diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml
new file mode 100644
index 000000000..8a0f43239
--- /dev/null
+++ b/config/samples/kustomization.yaml
@@ -0,0 +1,8 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+## Append samples of your project ##
+resources:
+- serving_v1alpha1_vllmruntime.yaml
+- serving_v1alpha1_router.yaml
+# +kubebuilder:scaffold:manifestskustomizesamples
diff --git a/config/samples/serving_v1alpha1_router.yaml b/config/samples/serving_v1alpha1_router.yaml
new file mode 100644
index 000000000..3400dace4
--- /dev/null
+++ b/config/samples/serving_v1alpha1_router.yaml
@@ -0,0 +1,57 @@
+apiVersion: serving.vllm.ai/v1alpha1
+kind: VLLMRouter
+metadata:
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+  name: vllmrouter-sample
+spec:
+  # Enable the router deployment
+  enableRouter: true
+
+  # Number of router replicas
+  replicas: 1
+
+  # Service discovery method (k8s or static)
+  serviceDiscovery: k8s
+
+  # Routing strategy (roundrobin or session)
+  routingLogic: roundrobin
+
+  # Engine statistics collection interval
+  engineScrapeInterval: "30"
+
+  # Request statistics window
+  requestStatsWindow: "60"
+
+  # Container port for the router service
+  port: 80
+
+  # Service account name
+  serviceAccountName: vllmrouter-sa
+
+  # Image configuration
+  image:
+    registry: docker.io
+    name: lmcache/lmstack-router
+    pullPolicy: IfNotPresent
+
+  # Resource requirements
+  resources:
+    cpu: "2"
+    memory: "8Gi"
+
+  # Environment variables
+  env:
+    - name: LOG_LEVEL
+      value: "info"
+    - name: METRICS_ENABLED
+      value: "true"
+
+  # Node selector for pod scheduling
+  nodeSelectorTerms:
+    - matchExpressions:
+        - key: kubernetes.io/os
+          operator: In
+          values:
+            - linux
diff --git a/config/samples/serving_v1alpha1_vllmruntime.yaml b/config/samples/serving_v1alpha1_vllmruntime.yaml
new file mode 100644
index 000000000..3730c482f
--- /dev/null
+++ b/config/samples/serving_v1alpha1_vllmruntime.yaml
@@ -0,0 +1,63 @@
+apiVersion: serving.vllm.ai/v1alpha1
+kind: VLLMRuntime
+metadata:
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+  name: vllmruntime-sample
+spec:
+
+  # vLLM specific configurations
+  enableChunkedPrefill: false
+  enablePrefixCaching: false
+  tensorParallelSize: 1
+  gpuMemoryUtilization: "0.8"
+  maxLoras: 4
+  extraArgs: ["--disable-log-requests"]
+  v1: false
+
+  # LM Cache configuration
+  lmCacheConfig:
+    enabled: true
+    cpuOffloadingBufferSize: "4Gi"
+    diskOffloadingBufferSize: "8Gi"
+    remoteUrl: ""
+    remoteSerde: ""
+
+  # Model configuration
+  model:
+    modelURL: "meta-llama/Llama-3.1-8B"
+    enableLoRA: false
+    enableTool: false
+    toolCallParser: ""
+    maxModelLen: 4096
+    dtype: "bfloat16"
+    maxNumSeqs: 32
+
+  # Environment variables
+  env:
+    - name: HF_HOME
+      value: "/data"
+
+  # Resource requirements
+  resources:
+    cpu: "10"
+    memory: "32Gi"
+    gpu: "1"
+
+  # Image configuration
+  image:
+    registry: "docker.io"
+    name: "lmcache/vllm-openai:2025-04-18"
+    pullPolicy: "IfNotPresent"
+    pullSecretName: ""
+
+  # HuggingFace token secret (optional)
+  hfTokenSecret:
+    name: "huggingface-token"
+
+  # Number of replicas
+  replicas: 1
+
+  # Deployment strategy
+  deploymentStrategy: "Recreate"
diff --git a/go.mod b/go.mod
new file mode 100644
index 000000000..be7288dec
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,100 @@
+module production-stack
+
+go 1.24.0
+
+toolchain go1.24.2
+
+require (
+	github.com/onsi/ginkgo/v2 v2.23.4
+	github.com/onsi/gomega v1.37.0
+	k8s.io/apimachinery v0.33.0
+	k8s.io/client-go v0.33.0
+	sigs.k8s.io/controller-runtime v0.20.4
+)
+
+require (
+	cel.dev/expr v0.18.0 // indirect
+	github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
+	github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a // indirect
+	github.com/beorn7/perks v1.0.1 // indirect
+	github.com/blang/semver/v4 v4.0.0 // indirect
+	github.com/cenkalti/backoff/v4 v4.3.0 // indirect
+	github.com/cespare/xxhash/v2 v2.3.0 // indirect
+	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
+	github.com/emicklei/go-restful/v3 v3.11.0 // indirect
+	github.com/evanphx/json-patch/v5 v5.9.11 // indirect
+	github.com/felixge/httpsnoop v1.0.4 // indirect
+	github.com/fsnotify/fsnotify v1.7.0 // indirect
+	github.com/fxamacker/cbor/v2 v2.7.0 // indirect
+	github.com/go-logr/logr v1.4.2 // indirect
+	github.com/go-logr/stdr v1.2.2 // indirect
+	github.com/go-logr/zapr v1.3.0 // indirect
+	github.com/go-openapi/jsonpointer v0.21.0 // indirect
+	github.com/go-openapi/jsonreference v0.20.2 // indirect
+	github.com/go-openapi/swag v0.23.0 // indirect
+	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
+	github.com/gogo/protobuf v1.3.2 // indirect
+	github.com/google/btree v1.1.3 // indirect
+	github.com/google/cel-go v0.22.0 // indirect
+	github.com/google/gnostic-models v0.6.9 // indirect
+	github.com/google/go-cmp v0.7.0 // indirect
+	github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
+	github.com/google/uuid v1.6.0 // indirect
+	github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect
+	github.com/inconshreveable/mousetrap v1.1.0 // indirect
+	github.com/josharian/intern v1.0.0 // indirect
+	github.com/json-iterator/go v1.1.12 // indirect
+	github.com/mailru/easyjson v0.7.7 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
+	github.com/pkg/errors v0.9.1 // indirect
+	github.com/prometheus/client_golang v1.19.1 // indirect
+	github.com/prometheus/client_model v0.6.1 // indirect
+	github.com/prometheus/common v0.55.0 // indirect
+	github.com/prometheus/procfs v0.15.1 // indirect
+	github.com/spf13/cobra v1.8.1 // indirect
+	github.com/spf13/pflag v1.0.5 // indirect
+	github.com/stoewer/go-strcase v1.3.0 // indirect
+	github.com/x448/float16 v0.8.4 // indirect
+	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 // indirect
+	go.opentelemetry.io/otel v1.28.0 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0 // indirect
+	go.opentelemetry.io/otel/metric v1.28.0 // indirect
+	go.opentelemetry.io/otel/sdk v1.28.0 // indirect
+	go.opentelemetry.io/otel/trace v1.28.0 // indirect
+	go.opentelemetry.io/proto/otlp v1.3.1 // indirect
+	go.uber.org/automaxprocs v1.6.0 // indirect
+	go.uber.org/multierr v1.11.0 // indirect
+	go.uber.org/zap v1.27.0 // indirect
+	golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
+	golang.org/x/net v0.38.0 // indirect
+	golang.org/x/oauth2 v0.27.0 // indirect
+	golang.org/x/sync v0.12.0 // indirect
+	golang.org/x/sys v0.32.0 // indirect
+	golang.org/x/term v0.30.0 // indirect
+	golang.org/x/text v0.23.0 // indirect
+	golang.org/x/time v0.9.0 // indirect
+	golang.org/x/tools v0.31.0 // indirect
+	gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 // indirect
+	google.golang.org/grpc v1.65.0 // indirect
+	google.golang.org/protobuf v1.36.5 // indirect
+	gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
+	gopkg.in/inf.v0 v0.9.1 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
+	k8s.io/api v0.33.0 // indirect
+	k8s.io/apiextensions-apiserver v0.32.1 // indirect
+	k8s.io/apiserver v0.32.1 // indirect
+	k8s.io/component-base v0.32.1 // indirect
+	k8s.io/klog/v2 v2.130.1 // indirect
+	k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect
+	k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 // indirect
+	sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.0 // indirect
+	sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
+	sigs.k8s.io/randfill v1.0.0 // indirect
+	sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect
+	sigs.k8s.io/yaml v1.4.0 // indirect
+)
diff --git a/go.sum b/go.sum
new file mode 100644
index 000000000..d4a9595dd
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,254 @@
+cel.dev/expr v0.18.0 h1:CJ6drgk+Hf96lkLikr4rFf19WrU0BOWEihyZnI2TAzo=
+cel.dev/expr v0.18.0/go.mod h1:MrpN08Q+lEBs+bGYdLxxHkZoUSsCp0nSKTs0nTymJgw=
+github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI=
+github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g=
+github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a h1:idn718Q4B6AGu/h5Sxe66HYVdqdGu2l9Iebqhi/AEoA=
+github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY=
+github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
+github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
+github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
+github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
+github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
+github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
+github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
+github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
+github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k=
+github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ=
+github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU=
+github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM=
+github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
+github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
+github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
+github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
+github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
+github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
+github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
+github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
+github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
+github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ=
+github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg=
+github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs=
+github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
+github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
+github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE=
+github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k=
+github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
+github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
+github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
+github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
+github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
+github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
+github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
+github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
+github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
+github.com/google/cel-go v0.22.0 h1:b3FJZxpiv1vTMo2/5RDUqAHPxkT8mmMfJIrq1llbf7g=
+github.com/google/cel-go v0.22.0/go.mod h1:BuznPXXfQDpXKWQ9sPW3TzlAJN5zzFe+i9tIs0yC4s8=
+github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw=
+github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw=
+github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
+github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
+github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8=
+github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k=
+github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
+github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
+github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
+github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
+github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
+github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus=
+github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8=
+github.com/onsi/gomega v1.37.0 h1:CdEG8g0S133B4OswTDC/5XPSzE1OeP29QOioj2PID2Y=
+github.com/onsi/gomega v1.37.0/go.mod h1:8D9+Txp43QWKhM24yyOBEdpkzN8FvJyAwecBgsU4KU0=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
+github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
+github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE=
+github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho=
+github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
+github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
+github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
+github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
+github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
+github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
+github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
+github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
+github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
+github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
+github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
+github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs=
+github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
+github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
+github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
+github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
+github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 h1:4K4tsIXefpVJtvA/8srF4V4y0akAoPHkIslgAkjixJA=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0/go.mod h1:jjdQuTGVsXV4vSs+CJ2qYDeDPf9yIJV23qlIzBm73Vg=
+go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo=
+go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 h1:3Q/xZUyC1BBkualc9ROb4G8qkH90LXEIICcs5zv1OYY=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0/go.mod h1:s75jGIWA9OfCMzF0xr+ZgfrB5FEbbV7UuYo32ahUiFI=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0 h1:qFffATk0X+HD+f1Z8lswGiOQYKHRlzfmdJm0wEaVrFA=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0/go.mod h1:MOiCmryaYtc+V0Ei+Tx9o5S1ZjA7kzLucuVuyzBZloQ=
+go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q=
+go.opentelemetry.io/otel/metric v1.28.0/go.mod h1:Fb1eVBFZmLVTMb6PPohq3TO9IIhUisDsbJoL/+uQW4s=
+go.opentelemetry.io/otel/sdk v1.28.0 h1:b9d7hIry8yZsgtbmM0DKyPWMMUMlK9NEKuIG4aBqWyE=
+go.opentelemetry.io/otel/sdk v1.28.0/go.mod h1:oYj7ClPUA7Iw3m+r7GeEjz0qckQRJK2B8zjcZEfu7Pg=
+go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g=
+go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI=
+go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0=
+go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8=
+go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
+go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8=
+go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
+go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
+go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
+go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
+go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
+go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=
+golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
+golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
+golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
+golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M=
+golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
+golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
+golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
+golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y=
+golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
+golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
+golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY=
+golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.31.0 h1:0EedkvKDbh+qistFTd0Bcwe/YLh4vHwWEkiI0toFIBU=
+golang.org/x/tools v0.31.0/go.mod h1:naFTU+Cev749tSJRXJlna0T3WxKvb1kWEx15xA4SdmQ=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw=
+gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY=
+google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 h1:YcyjlL1PRr2Q17/I0dPk2JmYS5CDXfcdb2Z3YRioEbw=
+google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7/go.mod h1:OCdP9MfskevB/rbYvHTsXTtKC+3bHWajPdoKgjcYkfo=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 h1:2035KHhUv+EpyB+hWgJnaWKJOdX1E95w2S8Rr4uWKTs=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
+google.golang.org/grpc v1.65.0 h1:bs/cUb4lp1G5iImFFd3u5ixQzweKizoZJAwBNLR42lc=
+google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ=
+google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
+google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
+gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4=
+gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
+gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
+gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+k8s.io/api v0.33.0 h1:yTgZVn1XEe6opVpP1FylmNrIFWuDqe2H0V8CT5gxfIU=
+k8s.io/api v0.33.0/go.mod h1:CTO61ECK/KU7haa3qq8sarQ0biLq2ju405IZAd9zsiM=
+k8s.io/apiextensions-apiserver v0.32.1 h1:hjkALhRUeCariC8DiVmb5jj0VjIc1N0DREP32+6UXZw=
+k8s.io/apiextensions-apiserver v0.32.1/go.mod h1:sxWIGuGiYov7Io1fAS2X06NjMIk5CbRHc2StSmbaQto=
+k8s.io/apimachinery v0.33.0 h1:1a6kHrJxb2hs4t8EE5wuR/WxKDwGN1FKH3JvDtA0CIQ=
+k8s.io/apimachinery v0.33.0/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM=
+k8s.io/apiserver v0.32.1 h1:oo0OozRos66WFq87Zc5tclUX2r0mymoVHRq8JmR7Aak=
+k8s.io/apiserver v0.32.1/go.mod h1:UcB9tWjBY7aryeI5zAgzVJB/6k7E97bkr1RgqDz0jPw=
+k8s.io/client-go v0.33.0 h1:UASR0sAYVUzs2kYuKn/ZakZlcs2bEHaizrrHUZg0G98=
+k8s.io/client-go v0.33.0/go.mod h1:kGkd+l/gNGg8GYWAPr0xF1rRKvVWvzh9vmZAMXtaKOg=
+k8s.io/component-base v0.32.1 h1:/5IfJ0dHIKBWysGV0yKTFfacZ5yNV1sulPh3ilJjRZk=
+k8s.io/component-base v0.32.1/go.mod h1:j1iMMHi/sqAHeG5z+O9BFNCF698a1u0186zkjMZQ28w=
+k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
+k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
+k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff h1:/usPimJzUKKu+m+TE36gUyGcf03XZEP0ZIKgKj35LS4=
+k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8=
+k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 h1:M3sRQVHv7vB20Xc2ybTt7ODCeFj6JSWYFzOFnYeS6Ro=
+k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
+sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.0 h1:CPT0ExVicCzcpeN4baWEV2ko2Z/AsiZgEdwgcfwLgMo=
+sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.0/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw=
+sigs.k8s.io/controller-runtime v0.20.4 h1:X3c+Odnxz+iPTRobG4tp092+CvBU9UK0t/bRf+n0DGU=
+sigs.k8s.io/controller-runtime v0.20.4/go.mod h1:xg2XB0K5ShQzAgsoujxuKN4LNXR2LfwwHsPj7Iaw+XY=
+sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8=
+sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo=
+sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
+sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
+sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
+sigs.k8s.io/structured-merge-diff/v4 v4.6.0 h1:IUA9nvMmnKWcj5jl84xn+T5MnlZKThmUW1TdblaLVAc=
+sigs.k8s.io/structured-merge-diff/v4 v4.6.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps=
+sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
+sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=
diff --git a/hack/boilerplate.go.txt b/hack/boilerplate.go.txt
new file mode 100644
index 000000000..4671de8fb
--- /dev/null
+++ b/hack/boilerplate.go.txt
@@ -0,0 +1,15 @@
+/*
+Copyright 2025.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go
new file mode 100644
index 000000000..13578d9d9
--- /dev/null
+++ b/internal/controller/suite_test.go
@@ -0,0 +1,88 @@
+/*
+Copyright 2025.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package controller
+
+import (
+	"fmt"
+	"path/filepath"
+	"runtime"
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+
+	"k8s.io/client-go/kubernetes/scheme"
+	"k8s.io/client-go/rest"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/envtest"
+	logf "sigs.k8s.io/controller-runtime/pkg/log"
+	"sigs.k8s.io/controller-runtime/pkg/log/zap"
+
+	servingv1alpha1 "production-stack/api/v1alpha1"
+	// +kubebuilder:scaffold:imports
+)
+
+// These tests use Ginkgo (BDD-style Go testing framework). Refer to
+// http://onsi.github.io/ginkgo/ to learn more about Ginkgo.
+
+var cfg *rest.Config
+var k8sClient client.Client
+var testEnv *envtest.Environment
+
+func TestControllers(t *testing.T) {
+	RegisterFailHandler(Fail)
+
+	RunSpecs(t, "Controller Suite")
+}
+
+var _ = BeforeSuite(func() {
+	logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true)))
+
+	By("bootstrapping test environment")
+	testEnv = &envtest.Environment{
+		CRDDirectoryPaths:     []string{filepath.Join("..", "..", "config", "crd", "bases")},
+		ErrorIfCRDPathMissing: true,
+
+		// The BinaryAssetsDirectory is only required if you want to run the tests directly
+		// without call the makefile target test. If not informed it will look for the
+		// default path defined in controller-runtime which is /usr/local/kubebuilder/.
+		// Note that you must have the required binaries setup under the bin directory to run the tests directly.
+		BinaryAssetsDirectory: filepath.Join("..", "..", "bin", "k8s",
+			fmt.Sprintf("1.29.0-%s-%s", runtime.GOOS, runtime.GOARCH)),
+	}
+
+	var err error
+	// cfg is defined in this file globally.
+	cfg, err = testEnv.Start()
+	Expect(err).NotTo(HaveOccurred())
+	Expect(cfg).NotTo(BeNil())
+
+	err = servingv1alpha1.AddToScheme(scheme.Scheme)
+	Expect(err).NotTo(HaveOccurred())
+
+	// +kubebuilder:scaffold:scheme
+
+	k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme})
+	Expect(err).NotTo(HaveOccurred())
+	Expect(k8sClient).NotTo(BeNil())
+})
+
+var _ = AfterSuite(func() {
+	By("tearing down the test environment")
+	err := testEnv.Stop()
+	Expect(err).NotTo(HaveOccurred())
+})
diff --git a/internal/controller/vllmrouter_controller.go b/internal/controller/vllmrouter_controller.go
new file mode 100644
index 000000000..002f9cddb
--- /dev/null
+++ b/internal/controller/vllmrouter_controller.go
@@ -0,0 +1,383 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package controller
+
+import (
+	"context"
+	"fmt"
+	"reflect"
+
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/errors"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/apimachinery/pkg/util/intstr"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/log"
+
+	servingv1alpha1 "production-stack/api/v1alpha1"
+)
+
+// VLLMRouterReconciler reconciles a VLLMRouter object
+type VLLMRouterReconciler struct {
+	client.Client
+	Scheme *runtime.Scheme
+}
+
+// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmrouters,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmrouters/status,verbs=get;update;patch
+// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmrouters/finalizers,verbs=update
+// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmruntimes,verbs=get;list;watch
+// +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete
+
+// Reconcile is part of the main kubernetes reconciliation loop which aims to
+// move the current state of the cluster closer to the desired state.
+func (r *VLLMRouterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+	log := log.FromContext(ctx)
+
+	// Fetch the VLLMRouter instance
+	router := &servingv1alpha1.VLLMRouter{}
+	err := r.Get(ctx, req.NamespacedName, router)
+	if err != nil {
+		if errors.IsNotFound(err) {
+			// Request object not found, could have been deleted after reconcile request.
+			// Return and don't requeue
+			log.Info("VLLMRouter resource not found. Ignoring since object must be deleted")
+			return ctrl.Result{}, nil
+		}
+		// Error reading the object - requeue the request.
+		log.Error(err, "Failed to get VLLMRouter")
+		return ctrl.Result{}, err
+	}
+
+	// Check if the service already exists, if not create a new one
+	foundService := &corev1.Service{}
+	err = r.Get(ctx, types.NamespacedName{Name: router.Name, Namespace: router.Namespace}, foundService)
+	if err != nil && errors.IsNotFound(err) {
+		// Define a new service
+		svc := r.serviceForVLLMRouter(router)
+		log.Info("Creating a new Service", "Service.Namespace", svc.Namespace, "Service.Name", svc.Name)
+		err = r.Create(ctx, svc)
+		if err != nil {
+			log.Error(err, "Failed to create new Service", "Service.Namespace", svc.Namespace, "Service.Name", svc.Name)
+			return ctrl.Result{}, err
+		}
+		// Service created successfully - return and requeue
+		return ctrl.Result{Requeue: true}, nil
+	} else if err != nil {
+		log.Error(err, "Failed to get Service")
+		return ctrl.Result{}, err
+	}
+
+	// Check if the deployment already exists, if not create a new one
+	found := &appsv1.Deployment{}
+	err = r.Get(ctx, types.NamespacedName{Name: router.Name, Namespace: router.Namespace}, found)
+	if err != nil && errors.IsNotFound(err) {
+		// Define a new deployment
+		dep := r.deploymentForVLLMRouter(router)
+		log.Info("Creating a new Deployment", "Deployment.Namespace", dep.Namespace, "Deployment.Name", dep.Name)
+		err = r.Create(ctx, dep)
+		if err != nil {
+			log.Error(err, "Failed to create new Deployment", "Deployment.Namespace", dep.Namespace, "Deployment.Name", dep.Name)
+			return ctrl.Result{}, err
+		}
+		// Deployment created successfully - return and requeue
+		return ctrl.Result{Requeue: true}, nil
+	} else if err != nil {
+		log.Error(err, "Failed to get Deployment")
+		return ctrl.Result{}, err
+	}
+
+	// Update the deployment if needed
+	if r.deploymentNeedsUpdate(found, router) {
+		log.Info("Updating Deployment", "Deployment.Namespace", found.Namespace, "Deployment.Name", found.Name)
+		// Create new deployment spec
+		newDep := r.deploymentForVLLMRouter(router)
+
+		err = r.Update(ctx, newDep)
+		if err != nil {
+			log.Error(err, "Failed to update Deployment", "Deployment.Namespace", found.Namespace, "Deployment.Name", found.Name)
+			return ctrl.Result{}, err
+		}
+		// Deployment updated successfully - return and requeue
+		return ctrl.Result{Requeue: true}, nil
+	}
+
+	// Update the status
+	if err := r.updateStatus(ctx, router, found); err != nil {
+		log.Error(err, "Failed to update VLLMRouter status")
+		return ctrl.Result{}, err
+	}
+
+	return ctrl.Result{}, nil
+}
+
+// deploymentForVLLMRouter returns a VLLMRouter Deployment object
+func (r *VLLMRouterReconciler) deploymentForVLLMRouter(router *servingv1alpha1.VLLMRouter) *appsv1.Deployment {
+	labels := map[string]string{
+		"app": router.Name,
+	}
+
+	// Add user-defined environment variables
+	env := []corev1.EnvVar{}
+	if router.Spec.Env != nil {
+		for _, e := range router.Spec.Env {
+			env = append(env, corev1.EnvVar{
+				Name:  e.Name,
+				Value: e.Value,
+			})
+		}
+	}
+
+	// Add VLLM API Key if specified
+	if router.Spec.VLLMApiKeySecret.Name != "" && router.Spec.VLLMApiKeyName != "" {
+		env = append(env, corev1.EnvVar{
+			Name: "VLLM_API_KEY",
+			ValueFrom: &corev1.EnvVarSource{
+				SecretKeyRef: &corev1.SecretKeySelector{
+					LocalObjectReference: router.Spec.VLLMApiKeySecret,
+					Key:                  router.Spec.VLLMApiKeyName,
+				},
+			},
+		})
+	}
+
+	// Build resource requirements
+	resources := corev1.ResourceRequirements{
+		Requests: corev1.ResourceList{},
+		Limits:   corev1.ResourceList{},
+	}
+
+	if router.Spec.Resources.CPU != "" {
+		resources.Requests[corev1.ResourceCPU] = resource.MustParse(router.Spec.Resources.CPU)
+		resources.Limits[corev1.ResourceCPU] = resource.MustParse(router.Spec.Resources.CPU)
+	}
+
+	if router.Spec.Resources.Memory != "" {
+		resources.Requests[corev1.ResourceMemory] = resource.MustParse(router.Spec.Resources.Memory)
+		resources.Limits[corev1.ResourceMemory] = resource.MustParse(router.Spec.Resources.Memory)
+	}
+
+	// Get the image from Image spec or use default
+	image := router.Spec.Image.Registry + "/" + router.Spec.Image.Name
+
+	// Get the image pull policy
+	imagePullPolicy := corev1.PullIfNotPresent
+	if router.Spec.Image.PullPolicy != "" {
+		imagePullPolicy = corev1.PullPolicy(router.Spec.Image.PullPolicy)
+	}
+
+	// Build image pull secrets
+	var imagePullSecrets []corev1.LocalObjectReference
+	if router.Spec.Image.PullSecretName != "" {
+		imagePullSecrets = append(imagePullSecrets, corev1.LocalObjectReference{
+			Name: router.Spec.Image.PullSecretName,
+		})
+	}
+
+	// Build container args
+	args := []string{
+		"--host", "0.0.0.0",
+		"--port", fmt.Sprintf("%d", router.Spec.Port),
+		"--service-discovery", router.Spec.ServiceDiscovery,
+	}
+
+	// Add service discovery specific args
+	if router.Spec.ServiceDiscovery == "k8s" {
+		args = append(args,
+			"--k8s-namespace", router.Namespace,
+		)
+	} else if router.Spec.ServiceDiscovery == "static" {
+		if router.Spec.StaticBackends == "" || router.Spec.StaticModels == "" {
+			// This should be handled by validation webhook
+			panic("static service discovery requires both staticBackends and staticModels")
+		}
+		args = append(args,
+			"--static-backends", router.Spec.StaticBackends,
+			"--static-models", router.Spec.StaticModels,
+		)
+	}
+
+	// Add optional args
+	if router.Spec.RoutingLogic != "" {
+		args = append(args, "--routing-logic", router.Spec.RoutingLogic)
+	}
+	if router.Spec.SessionKey != "" {
+		args = append(args, "--session-key", router.Spec.SessionKey)
+	}
+	if router.Spec.EngineScrapeInterval != "" {
+		args = append(args, "--engine-stats-interval", router.Spec.EngineScrapeInterval)
+	}
+	if router.Spec.RequestStatsWindow != "" {
+		args = append(args, "--request-stats-window", router.Spec.RequestStatsWindow)
+	}
+	if router.Spec.ExtraArgs != nil {
+		args = append(args, router.Spec.ExtraArgs...)
+	}
+
+	dep := &appsv1.Deployment{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      router.Name,
+			Namespace: router.Namespace,
+		},
+		Spec: appsv1.DeploymentSpec{
+			Replicas: &router.Spec.Replicas,
+			Selector: &metav1.LabelSelector{
+				MatchLabels: labels,
+			},
+			Template: corev1.PodTemplateSpec{
+				ObjectMeta: metav1.ObjectMeta{
+					Labels: labels,
+				},
+				Spec: corev1.PodSpec{
+					ServiceAccountName: router.Spec.ServiceAccountName,
+					ImagePullSecrets:   imagePullSecrets,
+					Containers: []corev1.Container{
+						{
+							Name:            "router",
+							Image:           image,
+							ImagePullPolicy: imagePullPolicy,
+							Args:            args,
+							Env:             env,
+							Ports: []corev1.ContainerPort{
+								{
+									Name:          "http",
+									ContainerPort: router.Spec.Port,
+								},
+							},
+							Resources: resources,
+							LivenessProbe: &corev1.Probe{
+								InitialDelaySeconds: 30,
+								PeriodSeconds:       5,
+								FailureThreshold:    3,
+								ProbeHandler: corev1.ProbeHandler{
+									HTTPGet: &corev1.HTTPGetAction{
+										Path: "/health",
+										Port: intstr.FromInt(int(router.Spec.Port)),
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+		},
+	}
+
+	// Add node affinity if specified
+	if router.Spec.NodeSelectorTerms != nil {
+		dep.Spec.Template.Spec.Affinity = &corev1.Affinity{
+			NodeAffinity: &corev1.NodeAffinity{
+				RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{
+					NodeSelectorTerms: router.Spec.NodeSelectorTerms,
+				},
+			},
+		}
+	}
+
+	// Set the owner reference
+	ctrl.SetControllerReference(router, dep, r.Scheme)
+	return dep
+}
+
+// deploymentNeedsUpdate checks if the deployment needs to be updated
+func (r *VLLMRouterReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, router *servingv1alpha1.VLLMRouter) bool {
+	// Generate the expected deployment
+	expectedDep := r.deploymentForVLLMRouter(router)
+
+	// Compare image
+	if expectedDep.Spec.Template.Spec.Containers[0].Image != dep.Spec.Template.Spec.Containers[0].Image {
+		return true
+	}
+
+	// Compare resources
+	expectedResources := expectedDep.Spec.Template.Spec.Containers[0].Resources
+	actualResources := dep.Spec.Template.Spec.Containers[0].Resources
+	if !reflect.DeepEqual(expectedResources, actualResources) {
+		return true
+	}
+
+	return false
+}
+
+// updateStatus updates the status of the VLLMRouter
+func (r *VLLMRouterReconciler) updateStatus(ctx context.Context, router *servingv1alpha1.VLLMRouter, dep *appsv1.Deployment) error {
+	// Re-read the VLLMRouter to get the latest version
+	latestRouter := &servingv1alpha1.VLLMRouter{}
+	if err := r.Get(ctx, types.NamespacedName{Name: router.Name, Namespace: router.Namespace}, latestRouter); err != nil {
+		return err
+	}
+
+	latestRouter.Status.LastUpdated = metav1.Now()
+
+	// Update VLLMRouter status based on deployment status
+	if dep.Status.AvailableReplicas > 0 {
+		latestRouter.Status.Status = "Ready"
+	} else if dep.Status.UpdatedReplicas > 0 {
+		latestRouter.Status.Status = "Updating"
+	} else {
+		latestRouter.Status.Status = "NotReady"
+	}
+
+	return r.Status().Update(ctx, latestRouter)
+}
+
+// serviceForVLLMRouter returns a VLLMRouter Service object
+func (r *VLLMRouterReconciler) serviceForVLLMRouter(router *servingv1alpha1.VLLMRouter) *corev1.Service {
+	labels := map[string]string{
+		"app": router.Name,
+	}
+
+	svc := &corev1.Service{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      router.Name,
+			Namespace: router.Namespace,
+		},
+		Spec: corev1.ServiceSpec{
+			Type:     corev1.ServiceTypeClusterIP,
+			Selector: labels,
+			Ports: []corev1.ServicePort{
+				{
+					Name:       "http",
+					Port:       80,
+					TargetPort: intstr.FromInt(int(router.Spec.Port)),
+					Protocol:   corev1.ProtocolTCP,
+				},
+			},
+		},
+	}
+
+	// Set the owner reference
+	ctrl.SetControllerReference(router, svc, r.Scheme)
+	return svc
+}
+
+// SetupWithManager sets up the controller with the Manager.
+func (r *VLLMRouterReconciler) SetupWithManager(mgr ctrl.Manager) error {
+	return ctrl.NewControllerManagedBy(mgr).
+		For(&servingv1alpha1.VLLMRouter{}).
+		Owns(&appsv1.Deployment{}).
+		Owns(&corev1.Service{}).
+		Complete(r)
+}
diff --git a/internal/controller/vllmrouter_controller_test.go b/internal/controller/vllmrouter_controller_test.go
new file mode 100644
index 000000000..fe68a9f5c
--- /dev/null
+++ b/internal/controller/vllmrouter_controller_test.go
@@ -0,0 +1,84 @@
+/*
+Copyright 2025.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package controller
+
+import (
+	"context"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+	"k8s.io/apimachinery/pkg/api/errors"
+	"k8s.io/apimachinery/pkg/types"
+	"sigs.k8s.io/controller-runtime/pkg/reconcile"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	servingv1alpha1 "production-stack/api/v1alpha1"
+)
+
+var _ = Describe("VLLMRouter Controller", func() {
+	Context("When reconciling a resource", func() {
+		const resourceName = "test-resource"
+
+		ctx := context.Background()
+
+		typeNamespacedName := types.NamespacedName{
+			Name:      resourceName,
+			Namespace: "default", // TODO(user):Modify as needed
+		}
+		router := &servingv1alpha1.VLLMRouter{}
+
+		BeforeEach(func() {
+			By("creating the custom resource for the Kind VLLMRouter")
+			err := k8sClient.Get(ctx, typeNamespacedName, router)
+			if err != nil && errors.IsNotFound(err) {
+				resource := &servingv1alpha1.VLLMRouter{
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      resourceName,
+						Namespace: "default",
+					},
+					// TODO(user): Specify other spec details if needed.
+				}
+				Expect(k8sClient.Create(ctx, resource)).To(Succeed())
+			}
+		})
+
+		AfterEach(func() {
+			// TODO(user): Cleanup logic after each test, like removing the resource instance.
+			resource := &servingv1alpha1.VLLMRouter{}
+			err := k8sClient.Get(ctx, typeNamespacedName, resource)
+			Expect(err).NotTo(HaveOccurred())
+
+			By("Cleanup the specific resource instance VLLMRouter")
+			Expect(k8sClient.Delete(ctx, resource)).To(Succeed())
+		})
+		It("should successfully reconcile the resource", func() {
+			By("Reconciling the created resource")
+			controllerReconciler := &VLLMRouterReconciler{
+				Client: k8sClient,
+				Scheme: k8sClient.Scheme(),
+			}
+
+			_, err := controllerReconciler.Reconcile(ctx, reconcile.Request{
+				NamespacedName: typeNamespacedName,
+			})
+			Expect(err).NotTo(HaveOccurred())
+			// TODO(user): Add more specific assertions depending on your controller's reconciliation logic.
+			// Example: If you expect a certain status condition after reconciliation, verify it here.
+		})
+	})
+})
diff --git a/internal/controller/vllmruntime_controller.go b/internal/controller/vllmruntime_controller.go
new file mode 100644
index 000000000..97cb59885
--- /dev/null
+++ b/internal/controller/vllmruntime_controller.go
@@ -0,0 +1,534 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package controller
+
+import (
+	"context"
+	"fmt"
+	"reflect"
+
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/errors"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/apimachinery/pkg/util/intstr"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/log"
+
+	servingv1alpha1 "production-stack/api/v1alpha1"
+)
+
+// VLLMRuntimeReconciler reconciles a VLLMRuntime object
+type VLLMRuntimeReconciler struct {
+	client.Client
+	Scheme *runtime.Scheme
+}
+
+// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmruntimes,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmruntimes/status,verbs=get;update;patch
+// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmruntimes/finalizers,verbs=update
+// +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete
+
+// Reconcile is part of the main kubernetes reconciliation loop which aims to
+// move the current state of the cluster closer to the desired state.
+func (r *VLLMRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+	log := log.FromContext(ctx)
+
+	// Fetch the VLLMRuntime instance
+	vllmRuntime := &servingv1alpha1.VLLMRuntime{}
+	err := r.Get(ctx, req.NamespacedName, vllmRuntime)
+	if err != nil {
+		if errors.IsNotFound(err) {
+			// Request object not found, could have been deleted after reconcile request.
+			// Return and don't requeue
+			log.Info("VLLMRuntime resource not found. Ignoring since object must be deleted")
+			return ctrl.Result{}, nil
+		}
+		// Error reading the object - requeue the request.
+		log.Error(err, "Failed to get VLLMRuntime")
+		return ctrl.Result{}, err
+	}
+
+	// Check if the service already exists, if not create a new one
+	foundService := &corev1.Service{}
+	err = r.Get(ctx, types.NamespacedName{Name: vllmRuntime.Name, Namespace: vllmRuntime.Namespace}, foundService)
+	if err != nil && errors.IsNotFound(err) {
+		// Define a new service
+		svc := r.serviceForVLLMRuntime(vllmRuntime)
+		log.Info("Creating a new Service", "Service.Namespace", svc.Namespace, "Service.Name", svc.Name)
+		err = r.Create(ctx, svc)
+		if err != nil {
+			log.Error(err, "Failed to create new Service", "Service.Namespace", svc.Namespace, "Service.Name", svc.Name)
+			return ctrl.Result{}, err
+		}
+		// Service created successfully - return and requeue
+		return ctrl.Result{Requeue: true}, nil
+	} else if err != nil {
+		log.Error(err, "Failed to get Service")
+		return ctrl.Result{}, err
+	}
+
+	// Update the service if needed
+	if r.serviceNeedsUpdate(foundService, vllmRuntime) {
+		log.Info("Updating Service", "Service.Namespace", foundService.Namespace, "Service.Name", foundService.Name)
+		// Create new service spec
+		newSvc := r.serviceForVLLMRuntime(vllmRuntime)
+
+		err = r.Update(ctx, newSvc)
+		if err != nil {
+			log.Error(err, "Failed to update Service", "Service.Namespace", foundService.Namespace, "Service.Name", foundService.Name)
+			return ctrl.Result{}, err
+		}
+		// Service updated successfully - return and requeue
+		return ctrl.Result{Requeue: true}, nil
+	}
+
+	// Check if the deployment already exists, if not create a new one
+	found := &appsv1.Deployment{}
+	err = r.Get(ctx, types.NamespacedName{Name: vllmRuntime.Name, Namespace: vllmRuntime.Namespace}, found)
+	if err != nil && errors.IsNotFound(err) {
+		// Define a new deployment
+		dep := r.deploymentForVLLMRuntime(vllmRuntime)
+		log.Info("Creating a new Deployment", "Deployment.Namespace", dep.Namespace, "Deployment.Name", dep.Name)
+		err = r.Create(ctx, dep)
+		if err != nil {
+			log.Error(err, "Failed to create new Deployment", "Deployment.Namespace", dep.Namespace, "Deployment.Name", dep.Name)
+			return ctrl.Result{}, err
+		}
+		// Deployment created successfully - return and requeue
+		return ctrl.Result{Requeue: true}, nil
+	} else if err != nil {
+		log.Error(err, "Failed to get Deployment")
+		return ctrl.Result{}, err
+	}
+
+	// Update the deployment if needed
+	if r.deploymentNeedsUpdate(found, vllmRuntime) {
+		log.Info("Updating Deployment", "Deployment.Namespace", found.Namespace, "Deployment.Name", found.Name)
+		// Create new deployment spec
+		newDep := r.deploymentForVLLMRuntime(vllmRuntime)
+
+		err = r.Update(ctx, newDep)
+		if err != nil {
+			log.Error(err, "Failed to update Deployment", "Deployment.Namespace", found.Namespace, "Deployment.Name", found.Name)
+			return ctrl.Result{}, err
+		}
+		// Deployment updated successfully - return and requeue
+		return ctrl.Result{Requeue: true}, nil
+	}
+
+	// Update the status
+	if err := r.updateStatus(ctx, vllmRuntime, found); err != nil {
+		log.Error(err, "Failed to update VLLMRuntime status")
+		return ctrl.Result{}, err
+	}
+
+	return ctrl.Result{}, nil
+}
+
+// deploymentForVLLMRuntime returns a VLLMRuntime Deployment object
+func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *servingv1alpha1.VLLMRuntime) *appsv1.Deployment {
+	labels := map[string]string{
+		"app": vllmRuntime.Name,
+	}
+
+	// Build command line arguments
+	args := []string{
+		"--model",
+		vllmRuntime.Spec.Model.ModelURL,
+		"--host",
+		"0.0.0.0",
+		"--port",
+		fmt.Sprintf("%d", vllmRuntime.Spec.Port),
+	}
+
+	if vllmRuntime.Spec.Model.EnableLoRA {
+		args = append(args, "--enable-lora")
+	}
+
+	if vllmRuntime.Spec.Model.EnableTool {
+		args = append(args, "--enable-auto-tool-choice")
+	}
+
+	if vllmRuntime.Spec.Model.ToolCallParser != "" {
+		args = append(args, "--tool-call-parser", vllmRuntime.Spec.Model.ToolCallParser)
+	}
+
+	if vllmRuntime.Spec.EnableChunkedPrefill {
+		args = append(args, "--enable-chunked-prefill")
+	} else {
+		args = append(args, "--no-enable-chunked-prefill")
+	}
+
+	if vllmRuntime.Spec.EnablePrefixCaching {
+		args = append(args, "--enable-prefix-caching")
+	} else {
+		args = append(args, "--no-enable-prefix-caching")
+	}
+
+	if vllmRuntime.Spec.Model.MaxModelLen > 0 {
+		args = append(args, "--max-model-len", fmt.Sprintf("%d", vllmRuntime.Spec.Model.MaxModelLen))
+	}
+
+	if vllmRuntime.Spec.Model.DType != "" {
+		args = append(args, "--dtype", vllmRuntime.Spec.Model.DType)
+	}
+
+	if vllmRuntime.Spec.TensorParallelSize > 0 {
+		args = append(args, "--tensor-parallel-size", fmt.Sprintf("%d", vllmRuntime.Spec.TensorParallelSize))
+	}
+
+	if vllmRuntime.Spec.Model.MaxNumSeqs > 0 {
+		args = append(args, "--max-num-seqs", fmt.Sprintf("%d", vllmRuntime.Spec.Model.MaxNumSeqs))
+	}
+
+	if vllmRuntime.Spec.GpuMemoryUtilization != "" {
+		args = append(args, "--gpu_memory_utilization", vllmRuntime.Spec.GpuMemoryUtilization)
+	}
+
+	if vllmRuntime.Spec.MaxLoras > 0 {
+		args = append(args, "--max_loras", fmt.Sprintf("%d", vllmRuntime.Spec.MaxLoras))
+	}
+
+	if vllmRuntime.Spec.ExtraArgs != nil {
+		args = append(args, vllmRuntime.Spec.ExtraArgs...)
+	}
+
+	// Build environment variables
+	env := []corev1.EnvVar{}
+	if vllmRuntime.Spec.V1 {
+		env = append(env, corev1.EnvVar{
+			Name:  "VLLM_USE_V1",
+			Value: "1",
+		})
+	} else {
+		env = append(env, corev1.EnvVar{
+			Name:  "VLLM_USE_V1",
+			Value: "0",
+		})
+	}
+
+	// LM Cache configuration
+	if vllmRuntime.Spec.LMCacheConfig.Enabled {
+		env = append(env,
+			corev1.EnvVar{
+				Name:  "LMCACHE_LOG_LEVEL",
+				Value: "DEBUG",
+			},
+			corev1.EnvVar{
+				Name:  "LMCACHE_USE_EXPERIMENTAL",
+				Value: "True",
+			},
+			corev1.EnvVar{
+				Name:  "VLLM_RPC_TIMEOUT",
+				Value: "1000000",
+			},
+		)
+
+		if vllmRuntime.Spec.LMCacheConfig.CPUOffloadingBufferSize != "" {
+			env = append(env,
+				corev1.EnvVar{
+					Name:  "LMCACHE_LOCAL_CPU",
+					Value: "True",
+				},
+				corev1.EnvVar{
+					Name:  "LMCACHE_MAX_LOCAL_CPU_SIZE",
+					Value: vllmRuntime.Spec.LMCacheConfig.CPUOffloadingBufferSize,
+				},
+			)
+		}
+
+		if vllmRuntime.Spec.LMCacheConfig.DiskOffloadingBufferSize != "" {
+			env = append(env,
+				corev1.EnvVar{
+					Name:  "LMCACHE_LOCAL_DISK",
+					Value: "True",
+				},
+				corev1.EnvVar{
+					Name:  "LMCACHE_MAX_LOCAL_DISK_SIZE",
+					Value: vllmRuntime.Spec.LMCacheConfig.DiskOffloadingBufferSize,
+				},
+			)
+		}
+
+		if vllmRuntime.Spec.LMCacheConfig.RemoteURL != "" {
+			env = append(env,
+				corev1.EnvVar{
+					Name:  "LMCACHE_REMOTE_URL",
+					Value: vllmRuntime.Spec.LMCacheConfig.RemoteURL,
+				},
+				corev1.EnvVar{
+					Name:  "LMCACHE_REMOTE_SERDE",
+					Value: vllmRuntime.Spec.LMCacheConfig.RemoteSerde,
+				},
+			)
+		}
+	}
+
+	// Add user-defined environment variables
+	if vllmRuntime.Spec.Env != nil {
+		for _, e := range vllmRuntime.Spec.Env {
+			env = append(env, corev1.EnvVar{
+				Name:  e.Name,
+				Value: e.Value,
+			})
+		}
+	}
+
+	// Build resource requirements
+	resources := corev1.ResourceRequirements{
+		Requests: corev1.ResourceList{},
+		Limits:   corev1.ResourceList{},
+	}
+
+	if vllmRuntime.Spec.Resources.CPU != "" {
+		resources.Requests[corev1.ResourceCPU] = resource.MustParse(vllmRuntime.Spec.Resources.CPU)
+		resources.Limits[corev1.ResourceCPU] = resource.MustParse(vllmRuntime.Spec.Resources.CPU)
+	}
+
+	if vllmRuntime.Spec.Resources.Memory != "" {
+		resources.Requests[corev1.ResourceMemory] = resource.MustParse(vllmRuntime.Spec.Resources.Memory)
+		resources.Limits[corev1.ResourceMemory] = resource.MustParse(vllmRuntime.Spec.Resources.Memory)
+	}
+
+	if vllmRuntime.Spec.Resources.GPU != "" {
+		// Parse GPU resource as a decimal value
+		gpuResource := resource.MustParse(vllmRuntime.Spec.Resources.GPU)
+		resources.Requests["nvidia.com/gpu"] = gpuResource
+		resources.Limits["nvidia.com/gpu"] = gpuResource
+	}
+
+	// Get the image from Image spec or use default
+	image := vllmRuntime.Spec.Image.Registry + "/" + vllmRuntime.Spec.Image.Name
+
+	// Get the image pull policy
+	imagePullPolicy := corev1.PullIfNotPresent
+	if vllmRuntime.Spec.Image.PullPolicy != "" {
+		imagePullPolicy = corev1.PullPolicy(vllmRuntime.Spec.Image.PullPolicy)
+	}
+
+	// Build image pull secrets
+	var imagePullSecrets []corev1.LocalObjectReference
+	if vllmRuntime.Spec.Image.PullSecretName != "" {
+		imagePullSecrets = append(imagePullSecrets, corev1.LocalObjectReference{
+			Name: vllmRuntime.Spec.Image.PullSecretName,
+		})
+	}
+
+	if vllmRuntime.Spec.HFTokenSecret.Name != "" {
+		env = append(env, corev1.EnvVar{
+			Name: "HF_TOKEN",
+			ValueFrom: &corev1.EnvVarSource{
+				SecretKeyRef: &corev1.SecretKeySelector{
+					LocalObjectReference: vllmRuntime.Spec.HFTokenSecret,
+					Key:                  vllmRuntime.Spec.HFTokenName,
+				},
+			},
+		})
+	}
+
+	dep := &appsv1.Deployment{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      vllmRuntime.Name,
+			Namespace: vllmRuntime.Namespace,
+		},
+		Spec: appsv1.DeploymentSpec{
+			Replicas: &vllmRuntime.Spec.Replicas,
+			Strategy: appsv1.DeploymentStrategy{
+				Type: appsv1.DeploymentStrategyType(vllmRuntime.Spec.DeployStrategy),
+			},
+			Selector: &metav1.LabelSelector{
+				MatchLabels: labels,
+			},
+			Template: corev1.PodTemplateSpec{
+				ObjectMeta: metav1.ObjectMeta{
+					Labels: labels,
+				},
+				Spec: corev1.PodSpec{
+					ImagePullSecrets: imagePullSecrets,
+					Containers: []corev1.Container{
+						{
+							Name:            "vllm",
+							Image:           image,
+							ImagePullPolicy: imagePullPolicy,
+							Command:         []string{"python3", "-m", "vllm.entrypoints.openai.api_server"},
+							Args:            args,
+							Env:             env,
+							Ports: []corev1.ContainerPort{
+								{
+									Name:          "http",
+									ContainerPort: vllmRuntime.Spec.Port,
+								},
+							},
+							Resources: resources,
+						},
+					},
+				},
+			},
+		},
+	}
+
+	// Set the owner reference
+	ctrl.SetControllerReference(vllmRuntime, dep, r.Scheme)
+	return dep
+}
+
+// deploymentNeedsUpdate checks if the deployment needs to be updated
+func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr *servingv1alpha1.VLLMRuntime) bool {
+	// Generate the expected deployment
+	expectedDep := r.deploymentForVLLMRuntime(vr)
+
+	// Compare model URL
+	expectedModelURL := vr.Spec.Model.ModelURL
+	actualModelURL := ""
+	// For vllm serve, the model URL is the first argument after the command
+	if len(dep.Spec.Template.Spec.Containers[0].Args) > 0 {
+		actualModelURL = dep.Spec.Template.Spec.Containers[0].Args[1]
+	}
+	if expectedModelURL != actualModelURL {
+		return true
+	}
+
+	// Compare port
+	expectedPort := vr.Spec.Port
+	actualPort := dep.Spec.Template.Spec.Containers[0].Ports[0].ContainerPort
+	if expectedPort != actualPort {
+		return true
+	}
+
+	// Compare image
+	if expectedDep.Spec.Template.Spec.Containers[0].Image != dep.Spec.Template.Spec.Containers[0].Image {
+		return true
+	}
+
+	// Compare resources
+	expectedResources := expectedDep.Spec.Template.Spec.Containers[0].Resources
+	actualResources := dep.Spec.Template.Spec.Containers[0].Resources
+	if !reflect.DeepEqual(expectedResources, actualResources) {
+		return true
+	}
+
+	// Compare LM Cache configuration
+	expectedLMCacheConfig := vr.Spec.LMCacheConfig
+	actualLMCacheConfig := dep.Spec.Template.Spec.Containers[0].Env
+
+	// Extract actual values from environment variables
+	actualEnabled := false
+	actualCPUOffloadingBufferSize := ""
+	actualDiskOffloadingBufferSize := ""
+
+	for _, env := range actualLMCacheConfig {
+		switch env.Name {
+		case "LMCACHE_USE_EXPERIMENTAL":
+			actualEnabled = env.Value == "True"
+		case "LMCACHE_MAX_LOCAL_CPU_SIZE":
+			actualCPUOffloadingBufferSize = env.Value
+		case "LMCACHE_MAX_LOCAL_DISK_SIZE":
+			actualDiskOffloadingBufferSize = env.Value
+		}
+	}
+
+	// Compare specific fields
+	if expectedLMCacheConfig.Enabled != actualEnabled ||
+		expectedLMCacheConfig.CPUOffloadingBufferSize != actualCPUOffloadingBufferSize ||
+		expectedLMCacheConfig.DiskOffloadingBufferSize != actualDiskOffloadingBufferSize {
+		return true
+	}
+
+	return false
+}
+
+// updateStatus updates the status of the VLLMRuntime
+func (r *VLLMRuntimeReconciler) updateStatus(ctx context.Context, vr *servingv1alpha1.VLLMRuntime, dep *appsv1.Deployment) error {
+	// Re-read the VLLMRuntime to get the latest version
+	latestVR := &servingv1alpha1.VLLMRuntime{}
+	if err := r.Get(ctx, types.NamespacedName{Name: vr.Name, Namespace: vr.Namespace}, latestVR); err != nil {
+		return err
+	}
+
+	latestVR.Status.LastUpdated = metav1.Now()
+
+	// Update model status based on deployment status
+	if dep.Status.AvailableReplicas > 0 {
+		latestVR.Status.ModelStatus = "Ready"
+	} else if dep.Status.UpdatedReplicas > 0 {
+		// If we have updated replicas but they're not yet available, mark as updating
+		latestVR.Status.ModelStatus = "Updating"
+	} else {
+		latestVR.Status.ModelStatus = "NotReady"
+	}
+
+	return r.Status().Update(ctx, latestVR)
+}
+
+// serviceForVLLMRuntime returns a VLLMRuntime Service object
+func (r *VLLMRuntimeReconciler) serviceForVLLMRuntime(vllmRuntime *servingv1alpha1.VLLMRuntime) *corev1.Service {
+	labels := map[string]string{
+		"app": vllmRuntime.Name,
+	}
+
+	svc := &corev1.Service{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      vllmRuntime.Name,
+			Namespace: vllmRuntime.Namespace,
+		},
+		Spec: corev1.ServiceSpec{
+			Type:     corev1.ServiceTypeClusterIP,
+			Selector: labels,
+			Ports: []corev1.ServicePort{
+				{
+					Name:       "http",
+					Port:       80,
+					TargetPort: intstr.FromInt(int(vllmRuntime.Spec.Port)),
+					Protocol:   corev1.ProtocolTCP,
+				},
+			},
+		},
+	}
+
+	// Set the owner reference
+	ctrl.SetControllerReference(vllmRuntime, svc, r.Scheme)
+	return svc
+}
+
+// serviceNeedsUpdate checks if the service needs to be updated
+func (r *VLLMRuntimeReconciler) serviceNeedsUpdate(svc *corev1.Service, vr *servingv1alpha1.VLLMRuntime) bool {
+	// Compare target port
+	expectedTargetPort := int(vr.Spec.Port)
+	actualTargetPort := svc.Spec.Ports[0].TargetPort.IntValue()
+	if expectedTargetPort != actualTargetPort {
+		return true
+	}
+
+	return false
+}
+
+// SetupWithManager sets up the controller with the Manager.
+func (r *VLLMRuntimeReconciler) SetupWithManager(mgr ctrl.Manager) error {
+	return ctrl.NewControllerManagedBy(mgr).
+		For(&servingv1alpha1.VLLMRuntime{}).
+		Owns(&appsv1.Deployment{}).
+		Owns(&corev1.Service{}).
+		Complete(r)
+}
diff --git a/internal/controller/vllmruntime_controller_test.go b/internal/controller/vllmruntime_controller_test.go
new file mode 100644
index 000000000..9f07fc776
--- /dev/null
+++ b/internal/controller/vllmruntime_controller_test.go
@@ -0,0 +1,84 @@
+/*
+Copyright 2025.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package controller
+
+import (
+	"context"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+	"k8s.io/apimachinery/pkg/api/errors"
+	"k8s.io/apimachinery/pkg/types"
+	"sigs.k8s.io/controller-runtime/pkg/reconcile"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	servingv1alpha1 "production-stack/api/v1alpha1"
+)
+
+var _ = Describe("VLLMRuntime Controller", func() {
+	Context("When reconciling a resource", func() {
+		const resourceName = "test-resource"
+
+		ctx := context.Background()
+
+		typeNamespacedName := types.NamespacedName{
+			Name:      resourceName,
+			Namespace: "default", // TODO(user):Modify as needed
+		}
+		vllmruntime := &servingv1alpha1.VLLMRuntime{}
+
+		BeforeEach(func() {
+			By("creating the custom resource for the Kind VLLMRuntime")
+			err := k8sClient.Get(ctx, typeNamespacedName, vllmruntime)
+			if err != nil && errors.IsNotFound(err) {
+				resource := &servingv1alpha1.VLLMRuntime{
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      resourceName,
+						Namespace: "default",
+					},
+					// TODO(user): Specify other spec details if needed.
+				}
+				Expect(k8sClient.Create(ctx, resource)).To(Succeed())
+			}
+		})
+
+		AfterEach(func() {
+			// TODO(user): Cleanup logic after each test, like removing the resource instance.
+			resource := &servingv1alpha1.VLLMRuntime{}
+			err := k8sClient.Get(ctx, typeNamespacedName, resource)
+			Expect(err).NotTo(HaveOccurred())
+
+			By("Cleanup the specific resource instance VLLMRuntime")
+			Expect(k8sClient.Delete(ctx, resource)).To(Succeed())
+		})
+		It("should successfully reconcile the resource", func() {
+			By("Reconciling the created resource")
+			controllerReconciler := &VLLMRuntimeReconciler{
+				Client: k8sClient,
+				Scheme: k8sClient.Scheme(),
+			}
+
+			_, err := controllerReconciler.Reconcile(ctx, reconcile.Request{
+				NamespacedName: typeNamespacedName,
+			})
+			Expect(err).NotTo(HaveOccurred())
+			// TODO(user): Add more specific assertions depending on your controller's reconciliation logic.
+			// Example: If you expect a certain status condition after reconciliation, verify it here.
+		})
+	})
+})

From 7907cd0ae4f52e4b46979b6d0f43d0802bd7a781 Mon Sep 17 00:00:00 2001
From: royyhuang <roy.y.huang@gmail.com>
Date: Fri, 9 May 2025 01:13:09 +0000
Subject: [PATCH 02/14] move opertor to a secondary dir instead of in root dir

Signed-off-by: royyhuang <roy.y.huang@gmail.com>
---
 .gitignore                                    |   3 +
 ...production-stack.vllm.ai_staticroutes.yaml | 218 ------------------
 .dockerignore => operator/.dockerignore       |   0
 Dockerfile => operator/Dockerfile             |   0
 Makefile => operator/Makefile                 |   0
 PROJECT => operator/PROJECT                   |   0
 .../api}/v1alpha1/groupversion_info.go        |   0
 .../api}/v1alpha1/vllmrouter_types.go         |   0
 .../api}/v1alpha1/vllmruntime_types.go        |   0
 .../api}/v1alpha1/zz_generated.deepcopy.go    |   0
 {cmd => operator/cmd}/main.go                 |   0
 .../bases/serving.vllm.ai_vllmrouters.yaml    |   0
 .../bases/serving.vllm.ai_vllmruntimes.yaml   |   0
 .../config}/crd/kustomization.yaml            |   0
 .../config}/crd/kustomizeconfig.yaml          |   0
 .../default/cert_metrics_manager_patch.yaml   |   0
 .../config}/default/kustomization.yaml        |   0
 .../default/manager_metrics_patch.yaml        |   0
 .../config}/default/metrics_service.yaml      |   0
 .../config}/manager/deployment.yaml           |   0
 .../config}/manager/kustomization.yaml        |   0
 .../config}/manager/namespace.yaml            |   0
 .../network-policy/allow-metrics-traffic.yaml |   0
 .../config}/network-policy/kustomization.yaml |   0
 .../config}/prometheus/kustomization.yaml     |   0
 .../config}/prometheus/monitor.yaml           |   0
 .../config}/prometheus/monitor_tls_patch.yaml |   0
 .../config}/rbac/kustomization.yaml           |   0
 .../config}/rbac/leader_election_role.yaml    |   0
 .../rbac/leader_election_role_binding.yaml    |   0
 .../config}/rbac/metrics_auth_role.yaml       |   0
 .../rbac/metrics_auth_role_binding.yaml       |   0
 .../config}/rbac/metrics_reader_role.yaml     |   0
 .../config}/rbac/pod_viewer_role.yaml         |   0
 {config => operator/config}/rbac/role.yaml    |  34 ---
 .../config}/rbac/role_binding.yaml            |   0
 .../config}/rbac/service_account.yaml         |   0
 .../config}/rbac/vllmrouter_admin_role.yaml   |   0
 .../config}/rbac/vllmrouter_editor_role.yaml  |   0
 .../config}/rbac/vllmrouter_role_binding.yaml |   0
 .../rbac/vllmrouter_service_account.yaml      |   0
 .../config}/rbac/vllmrouter_viewer_role.yaml  |   0
 .../config}/rbac/vllmruntime_admin_role.yaml  |   0
 .../config}/rbac/vllmruntime_editor_role.yaml |   0
 .../config}/rbac/vllmruntime_viewer_role.yaml |   0
 .../config}/samples/kustomization.yaml        |   0
 .../samples/serving_v1alpha1_router.yaml      |   0
 .../samples/serving_v1alpha1_vllmruntime.yaml |   0
 go.mod => operator/go.mod                     |   0
 go.sum => operator/go.sum                     |   0
 {hack => operator/hack}/boilerplate.go.txt    |   0
 .../internal}/controller/suite_test.go        |   0
 .../controller/vllmrouter_controller.go       |   0
 .../controller/vllmrouter_controller_test.go  |   0
 .../controller/vllmruntime_controller.go      |   0
 .../controller/vllmruntime_controller_test.go |   0
 56 files changed, 3 insertions(+), 252 deletions(-)
 delete mode 100644 config/crd/bases/production-stack.vllm.ai_staticroutes.yaml
 rename .dockerignore => operator/.dockerignore (100%)
 rename Dockerfile => operator/Dockerfile (100%)
 rename Makefile => operator/Makefile (100%)
 rename PROJECT => operator/PROJECT (100%)
 rename {api => operator/api}/v1alpha1/groupversion_info.go (100%)
 rename {api => operator/api}/v1alpha1/vllmrouter_types.go (100%)
 rename {api => operator/api}/v1alpha1/vllmruntime_types.go (100%)
 rename {api => operator/api}/v1alpha1/zz_generated.deepcopy.go (100%)
 rename {cmd => operator/cmd}/main.go (100%)
 rename {config => operator/config}/crd/bases/serving.vllm.ai_vllmrouters.yaml (100%)
 rename {config => operator/config}/crd/bases/serving.vllm.ai_vllmruntimes.yaml (100%)
 rename {config => operator/config}/crd/kustomization.yaml (100%)
 rename {config => operator/config}/crd/kustomizeconfig.yaml (100%)
 rename {config => operator/config}/default/cert_metrics_manager_patch.yaml (100%)
 rename {config => operator/config}/default/kustomization.yaml (100%)
 rename {config => operator/config}/default/manager_metrics_patch.yaml (100%)
 rename {config => operator/config}/default/metrics_service.yaml (100%)
 rename {config => operator/config}/manager/deployment.yaml (100%)
 rename {config => operator/config}/manager/kustomization.yaml (100%)
 rename {config => operator/config}/manager/namespace.yaml (100%)
 rename {config => operator/config}/network-policy/allow-metrics-traffic.yaml (100%)
 rename {config => operator/config}/network-policy/kustomization.yaml (100%)
 rename {config => operator/config}/prometheus/kustomization.yaml (100%)
 rename {config => operator/config}/prometheus/monitor.yaml (100%)
 rename {config => operator/config}/prometheus/monitor_tls_patch.yaml (100%)
 rename {config => operator/config}/rbac/kustomization.yaml (100%)
 rename {config => operator/config}/rbac/leader_election_role.yaml (100%)
 rename {config => operator/config}/rbac/leader_election_role_binding.yaml (100%)
 rename {config => operator/config}/rbac/metrics_auth_role.yaml (100%)
 rename {config => operator/config}/rbac/metrics_auth_role_binding.yaml (100%)
 rename {config => operator/config}/rbac/metrics_reader_role.yaml (100%)
 rename {config => operator/config}/rbac/pod_viewer_role.yaml (100%)
 rename {config => operator/config}/rbac/role.yaml (63%)
 rename {config => operator/config}/rbac/role_binding.yaml (100%)
 rename {config => operator/config}/rbac/service_account.yaml (100%)
 rename {config => operator/config}/rbac/vllmrouter_admin_role.yaml (100%)
 rename {config => operator/config}/rbac/vllmrouter_editor_role.yaml (100%)
 rename {config => operator/config}/rbac/vllmrouter_role_binding.yaml (100%)
 rename {config => operator/config}/rbac/vllmrouter_service_account.yaml (100%)
 rename {config => operator/config}/rbac/vllmrouter_viewer_role.yaml (100%)
 rename {config => operator/config}/rbac/vllmruntime_admin_role.yaml (100%)
 rename {config => operator/config}/rbac/vllmruntime_editor_role.yaml (100%)
 rename {config => operator/config}/rbac/vllmruntime_viewer_role.yaml (100%)
 rename {config => operator/config}/samples/kustomization.yaml (100%)
 rename {config => operator/config}/samples/serving_v1alpha1_router.yaml (100%)
 rename {config => operator/config}/samples/serving_v1alpha1_vllmruntime.yaml (100%)
 rename go.mod => operator/go.mod (100%)
 rename go.sum => operator/go.sum (100%)
 rename {hack => operator/hack}/boilerplate.go.txt (100%)
 rename {internal => operator/internal}/controller/suite_test.go (100%)
 rename {internal => operator/internal}/controller/vllmrouter_controller.go (100%)
 rename {internal => operator/internal}/controller/vllmrouter_controller_test.go (100%)
 rename {internal => operator/internal}/controller/vllmruntime_controller.go (100%)
 rename {internal => operator/internal}/controller/vllmruntime_controller_test.go (100%)

diff --git a/.gitignore b/.gitignore
index 1341491df..b0adb89f1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -178,3 +178,6 @@ docs/book/src/docs
 /testdata/**/go.sum
 /docs/book/src/simple-external-plugin-tutorial/testdata/sampleexternalplugin/v1/bin
 /testdata/**legacy**
+
+# skip operator's test for now
+operator/test
diff --git a/config/crd/bases/production-stack.vllm.ai_staticroutes.yaml b/config/crd/bases/production-stack.vllm.ai_staticroutes.yaml
deleted file mode 100644
index cd6dd48bd..000000000
--- a/config/crd/bases/production-stack.vllm.ai_staticroutes.yaml
+++ /dev/null
@@ -1,218 +0,0 @@
----
-apiVersion: apiextensions.k8s.io/v1
-kind: CustomResourceDefinition
-metadata:
-  annotations:
-    controller-gen.kubebuilder.io/version: v0.17.2
-  name: staticroutes.production-stack.vllm.ai
-spec:
-  group: production-stack.vllm.ai
-  names:
-    kind: StaticRoute
-    listKind: StaticRouteList
-    plural: staticroutes
-    singular: staticroute
-  scope: Namespaced
-  versions:
-  - name: v1alpha1
-    schema:
-      openAPIV3Schema:
-        description: StaticRoute is the Schema for the staticroutes API
-        properties:
-          apiVersion:
-            description: |-
-              APIVersion defines the versioned schema of this representation of an object.
-              Servers should convert recognized schemas to the latest internal value, and
-              may reject unrecognized values.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
-            type: string
-          kind:
-            description: |-
-              Kind is a string value representing the REST resource this object represents.
-              Servers may infer this from the endpoint the client submits requests to.
-              Cannot be updated.
-              In CamelCase.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
-            type: string
-          metadata:
-            type: object
-          spec:
-            description: StaticRouteSpec defines the desired state of StaticRoute
-            properties:
-              configMapName:
-                description: ConfigMapName is the name of the ConfigMap to create
-                  with the dynamic config
-                type: string
-              healthCheck:
-                description: HealthCheck defines the health check configuration for
-                  the router
-                properties:
-                  failureThreshold:
-                    default: 3
-                    description: Minimum consecutive failures for the probe to be
-                      considered failed
-                    format: int32
-                    minimum: 1
-                    type: integer
-                  periodSeconds:
-                    default: 10
-                    description: Number of seconds between probe attempts
-                    format: int32
-                    minimum: 1
-                    type: integer
-                  successThreshold:
-                    default: 1
-                    description: Minimum consecutive successes for the probe to be
-                      considered successful
-                    format: int32
-                    minimum: 1
-                    type: integer
-                  timeoutSeconds:
-                    default: 5
-                    description: Number of seconds after which the probe times out
-                    format: int32
-                    minimum: 1
-                    type: integer
-                type: object
-              routerRef:
-                description: RouterRef is a reference to the router service
-                properties:
-                  apiVersion:
-                    description: API version of the referent.
-                    type: string
-                  fieldPath:
-                    description: |-
-                      If referring to a piece of an object instead of an entire object, this string
-                      should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
-                      For example, if the object reference is to a container within a pod, this would take on a value like:
-                      "spec.containers{name}" (where "name" refers to the name of the container that triggered
-                      the event) or if no container name is specified "spec.containers[2]" (container with
-                      index 2 in this pod). This syntax is chosen only to have some well-defined way of
-                      referencing a part of an object.
-                    type: string
-                  kind:
-                    description: |-
-                      Kind of the referent.
-                      More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
-                    type: string
-                  name:
-                    description: |-
-                      Name of the referent.
-                      More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
-                    type: string
-                  namespace:
-                    description: |-
-                      Namespace of the referent.
-                      More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
-                    type: string
-                  resourceVersion:
-                    description: |-
-                      Specific resourceVersion to which this reference is made, if any.
-                      More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
-                    type: string
-                  uid:
-                    description: |-
-                      UID of the referent.
-                      More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
-                    type: string
-                type: object
-                x-kubernetes-map-type: atomic
-              routingLogic:
-                default: roundrobin
-                description: RoutingLogic specifies the routing logic to use
-                enum:
-                - roundrobin
-                type: string
-              serviceDiscovery:
-                default: static
-                description: ServiceDiscovery specifies the service discovery method
-                enum:
-                - static
-                type: string
-              staticBackends:
-                description: StaticBackends is a comma-separated list of backend URLs
-                type: string
-              staticModels:
-                description: StaticModels is a comma-separated list of model names
-                type: string
-            required:
-            - routingLogic
-            - serviceDiscovery
-            - staticBackends
-            - staticModels
-            type: object
-          status:
-            description: StaticRouteStatus defines the observed state of StaticRoute
-            properties:
-              conditions:
-                description: Conditions represent the latest available observations
-                  of the StaticRoute's state
-                items:
-                  description: Condition contains details for one aspect of the current
-                    state of this API Resource.
-                  properties:
-                    lastTransitionTime:
-                      description: |-
-                        lastTransitionTime is the last time the condition transitioned from one status to another.
-                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
-                      format: date-time
-                      type: string
-                    message:
-                      description: |-
-                        message is a human readable message indicating details about the transition.
-                        This may be an empty string.
-                      maxLength: 32768
-                      type: string
-                    observedGeneration:
-                      description: |-
-                        observedGeneration represents the .metadata.generation that the condition was set based upon.
-                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
-                        with respect to the current state of the instance.
-                      format: int64
-                      minimum: 0
-                      type: integer
-                    reason:
-                      description: |-
-                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
-                        Producers of specific condition types may define expected values and meanings for this field,
-                        and whether the values are considered a guaranteed API.
-                        The value should be a CamelCase string.
-                        This field may not be empty.
-                      maxLength: 1024
-                      minLength: 1
-                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
-                      type: string
-                    status:
-                      description: status of the condition, one of True, False, Unknown.
-                      enum:
-                      - "True"
-                      - "False"
-                      - Unknown
-                      type: string
-                    type:
-                      description: type of condition in CamelCase or in foo.example.com/CamelCase.
-                      maxLength: 316
-                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
-                      type: string
-                  required:
-                  - lastTransitionTime
-                  - message
-                  - reason
-                  - status
-                  - type
-                  type: object
-                type: array
-              configMapRef:
-                description: ConfigMapRef is a reference to the created ConfigMap
-                type: string
-              lastAppliedTime:
-                description: LastAppliedTime is the last time the configuration was
-                  applied to the router
-                format: date-time
-                type: string
-            type: object
-        type: object
-    served: true
-    storage: true
-    subresources:
-      status: {}
diff --git a/.dockerignore b/operator/.dockerignore
similarity index 100%
rename from .dockerignore
rename to operator/.dockerignore
diff --git a/Dockerfile b/operator/Dockerfile
similarity index 100%
rename from Dockerfile
rename to operator/Dockerfile
diff --git a/Makefile b/operator/Makefile
similarity index 100%
rename from Makefile
rename to operator/Makefile
diff --git a/PROJECT b/operator/PROJECT
similarity index 100%
rename from PROJECT
rename to operator/PROJECT
diff --git a/api/v1alpha1/groupversion_info.go b/operator/api/v1alpha1/groupversion_info.go
similarity index 100%
rename from api/v1alpha1/groupversion_info.go
rename to operator/api/v1alpha1/groupversion_info.go
diff --git a/api/v1alpha1/vllmrouter_types.go b/operator/api/v1alpha1/vllmrouter_types.go
similarity index 100%
rename from api/v1alpha1/vllmrouter_types.go
rename to operator/api/v1alpha1/vllmrouter_types.go
diff --git a/api/v1alpha1/vllmruntime_types.go b/operator/api/v1alpha1/vllmruntime_types.go
similarity index 100%
rename from api/v1alpha1/vllmruntime_types.go
rename to operator/api/v1alpha1/vllmruntime_types.go
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/operator/api/v1alpha1/zz_generated.deepcopy.go
similarity index 100%
rename from api/v1alpha1/zz_generated.deepcopy.go
rename to operator/api/v1alpha1/zz_generated.deepcopy.go
diff --git a/cmd/main.go b/operator/cmd/main.go
similarity index 100%
rename from cmd/main.go
rename to operator/cmd/main.go
diff --git a/config/crd/bases/serving.vllm.ai_vllmrouters.yaml b/operator/config/crd/bases/serving.vllm.ai_vllmrouters.yaml
similarity index 100%
rename from config/crd/bases/serving.vllm.ai_vllmrouters.yaml
rename to operator/config/crd/bases/serving.vllm.ai_vllmrouters.yaml
diff --git a/config/crd/bases/serving.vllm.ai_vllmruntimes.yaml b/operator/config/crd/bases/serving.vllm.ai_vllmruntimes.yaml
similarity index 100%
rename from config/crd/bases/serving.vllm.ai_vllmruntimes.yaml
rename to operator/config/crd/bases/serving.vllm.ai_vllmruntimes.yaml
diff --git a/config/crd/kustomization.yaml b/operator/config/crd/kustomization.yaml
similarity index 100%
rename from config/crd/kustomization.yaml
rename to operator/config/crd/kustomization.yaml
diff --git a/config/crd/kustomizeconfig.yaml b/operator/config/crd/kustomizeconfig.yaml
similarity index 100%
rename from config/crd/kustomizeconfig.yaml
rename to operator/config/crd/kustomizeconfig.yaml
diff --git a/config/default/cert_metrics_manager_patch.yaml b/operator/config/default/cert_metrics_manager_patch.yaml
similarity index 100%
rename from config/default/cert_metrics_manager_patch.yaml
rename to operator/config/default/cert_metrics_manager_patch.yaml
diff --git a/config/default/kustomization.yaml b/operator/config/default/kustomization.yaml
similarity index 100%
rename from config/default/kustomization.yaml
rename to operator/config/default/kustomization.yaml
diff --git a/config/default/manager_metrics_patch.yaml b/operator/config/default/manager_metrics_patch.yaml
similarity index 100%
rename from config/default/manager_metrics_patch.yaml
rename to operator/config/default/manager_metrics_patch.yaml
diff --git a/config/default/metrics_service.yaml b/operator/config/default/metrics_service.yaml
similarity index 100%
rename from config/default/metrics_service.yaml
rename to operator/config/default/metrics_service.yaml
diff --git a/config/manager/deployment.yaml b/operator/config/manager/deployment.yaml
similarity index 100%
rename from config/manager/deployment.yaml
rename to operator/config/manager/deployment.yaml
diff --git a/config/manager/kustomization.yaml b/operator/config/manager/kustomization.yaml
similarity index 100%
rename from config/manager/kustomization.yaml
rename to operator/config/manager/kustomization.yaml
diff --git a/config/manager/namespace.yaml b/operator/config/manager/namespace.yaml
similarity index 100%
rename from config/manager/namespace.yaml
rename to operator/config/manager/namespace.yaml
diff --git a/config/network-policy/allow-metrics-traffic.yaml b/operator/config/network-policy/allow-metrics-traffic.yaml
similarity index 100%
rename from config/network-policy/allow-metrics-traffic.yaml
rename to operator/config/network-policy/allow-metrics-traffic.yaml
diff --git a/config/network-policy/kustomization.yaml b/operator/config/network-policy/kustomization.yaml
similarity index 100%
rename from config/network-policy/kustomization.yaml
rename to operator/config/network-policy/kustomization.yaml
diff --git a/config/prometheus/kustomization.yaml b/operator/config/prometheus/kustomization.yaml
similarity index 100%
rename from config/prometheus/kustomization.yaml
rename to operator/config/prometheus/kustomization.yaml
diff --git a/config/prometheus/monitor.yaml b/operator/config/prometheus/monitor.yaml
similarity index 100%
rename from config/prometheus/monitor.yaml
rename to operator/config/prometheus/monitor.yaml
diff --git a/config/prometheus/monitor_tls_patch.yaml b/operator/config/prometheus/monitor_tls_patch.yaml
similarity index 100%
rename from config/prometheus/monitor_tls_patch.yaml
rename to operator/config/prometheus/monitor_tls_patch.yaml
diff --git a/config/rbac/kustomization.yaml b/operator/config/rbac/kustomization.yaml
similarity index 100%
rename from config/rbac/kustomization.yaml
rename to operator/config/rbac/kustomization.yaml
diff --git a/config/rbac/leader_election_role.yaml b/operator/config/rbac/leader_election_role.yaml
similarity index 100%
rename from config/rbac/leader_election_role.yaml
rename to operator/config/rbac/leader_election_role.yaml
diff --git a/config/rbac/leader_election_role_binding.yaml b/operator/config/rbac/leader_election_role_binding.yaml
similarity index 100%
rename from config/rbac/leader_election_role_binding.yaml
rename to operator/config/rbac/leader_election_role_binding.yaml
diff --git a/config/rbac/metrics_auth_role.yaml b/operator/config/rbac/metrics_auth_role.yaml
similarity index 100%
rename from config/rbac/metrics_auth_role.yaml
rename to operator/config/rbac/metrics_auth_role.yaml
diff --git a/config/rbac/metrics_auth_role_binding.yaml b/operator/config/rbac/metrics_auth_role_binding.yaml
similarity index 100%
rename from config/rbac/metrics_auth_role_binding.yaml
rename to operator/config/rbac/metrics_auth_role_binding.yaml
diff --git a/config/rbac/metrics_reader_role.yaml b/operator/config/rbac/metrics_reader_role.yaml
similarity index 100%
rename from config/rbac/metrics_reader_role.yaml
rename to operator/config/rbac/metrics_reader_role.yaml
diff --git a/config/rbac/pod_viewer_role.yaml b/operator/config/rbac/pod_viewer_role.yaml
similarity index 100%
rename from config/rbac/pod_viewer_role.yaml
rename to operator/config/rbac/pod_viewer_role.yaml
diff --git a/config/rbac/role.yaml b/operator/config/rbac/role.yaml
similarity index 63%
rename from config/rbac/role.yaml
rename to operator/config/rbac/role.yaml
index 7109b302c..04f6f081a 100644
--- a/config/rbac/role.yaml
+++ b/operator/config/rbac/role.yaml
@@ -18,14 +18,6 @@ rules:
   - patch
   - update
   - watch
-- apiGroups:
-  - ""
-  resources:
-  - pods
-  verbs:
-  - get
-  - list
-  - watch
 - apiGroups:
   - apps
   resources:
@@ -38,32 +30,6 @@ rules:
   - patch
   - update
   - watch
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - staticroutes
-  verbs:
-  - create
-  - delete
-  - get
-  - list
-  - patch
-  - update
-  - watch
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - staticroutes/finalizers
-  verbs:
-  - update
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - staticroutes/status
-  verbs:
-  - get
-  - patch
-  - update
 - apiGroups:
   - serving.vllm.ai
   resources:
diff --git a/config/rbac/role_binding.yaml b/operator/config/rbac/role_binding.yaml
similarity index 100%
rename from config/rbac/role_binding.yaml
rename to operator/config/rbac/role_binding.yaml
diff --git a/config/rbac/service_account.yaml b/operator/config/rbac/service_account.yaml
similarity index 100%
rename from config/rbac/service_account.yaml
rename to operator/config/rbac/service_account.yaml
diff --git a/config/rbac/vllmrouter_admin_role.yaml b/operator/config/rbac/vllmrouter_admin_role.yaml
similarity index 100%
rename from config/rbac/vllmrouter_admin_role.yaml
rename to operator/config/rbac/vllmrouter_admin_role.yaml
diff --git a/config/rbac/vllmrouter_editor_role.yaml b/operator/config/rbac/vllmrouter_editor_role.yaml
similarity index 100%
rename from config/rbac/vllmrouter_editor_role.yaml
rename to operator/config/rbac/vllmrouter_editor_role.yaml
diff --git a/config/rbac/vllmrouter_role_binding.yaml b/operator/config/rbac/vllmrouter_role_binding.yaml
similarity index 100%
rename from config/rbac/vllmrouter_role_binding.yaml
rename to operator/config/rbac/vllmrouter_role_binding.yaml
diff --git a/config/rbac/vllmrouter_service_account.yaml b/operator/config/rbac/vllmrouter_service_account.yaml
similarity index 100%
rename from config/rbac/vllmrouter_service_account.yaml
rename to operator/config/rbac/vllmrouter_service_account.yaml
diff --git a/config/rbac/vllmrouter_viewer_role.yaml b/operator/config/rbac/vllmrouter_viewer_role.yaml
similarity index 100%
rename from config/rbac/vllmrouter_viewer_role.yaml
rename to operator/config/rbac/vllmrouter_viewer_role.yaml
diff --git a/config/rbac/vllmruntime_admin_role.yaml b/operator/config/rbac/vllmruntime_admin_role.yaml
similarity index 100%
rename from config/rbac/vllmruntime_admin_role.yaml
rename to operator/config/rbac/vllmruntime_admin_role.yaml
diff --git a/config/rbac/vllmruntime_editor_role.yaml b/operator/config/rbac/vllmruntime_editor_role.yaml
similarity index 100%
rename from config/rbac/vllmruntime_editor_role.yaml
rename to operator/config/rbac/vllmruntime_editor_role.yaml
diff --git a/config/rbac/vllmruntime_viewer_role.yaml b/operator/config/rbac/vllmruntime_viewer_role.yaml
similarity index 100%
rename from config/rbac/vllmruntime_viewer_role.yaml
rename to operator/config/rbac/vllmruntime_viewer_role.yaml
diff --git a/config/samples/kustomization.yaml b/operator/config/samples/kustomization.yaml
similarity index 100%
rename from config/samples/kustomization.yaml
rename to operator/config/samples/kustomization.yaml
diff --git a/config/samples/serving_v1alpha1_router.yaml b/operator/config/samples/serving_v1alpha1_router.yaml
similarity index 100%
rename from config/samples/serving_v1alpha1_router.yaml
rename to operator/config/samples/serving_v1alpha1_router.yaml
diff --git a/config/samples/serving_v1alpha1_vllmruntime.yaml b/operator/config/samples/serving_v1alpha1_vllmruntime.yaml
similarity index 100%
rename from config/samples/serving_v1alpha1_vllmruntime.yaml
rename to operator/config/samples/serving_v1alpha1_vllmruntime.yaml
diff --git a/go.mod b/operator/go.mod
similarity index 100%
rename from go.mod
rename to operator/go.mod
diff --git a/go.sum b/operator/go.sum
similarity index 100%
rename from go.sum
rename to operator/go.sum
diff --git a/hack/boilerplate.go.txt b/operator/hack/boilerplate.go.txt
similarity index 100%
rename from hack/boilerplate.go.txt
rename to operator/hack/boilerplate.go.txt
diff --git a/internal/controller/suite_test.go b/operator/internal/controller/suite_test.go
similarity index 100%
rename from internal/controller/suite_test.go
rename to operator/internal/controller/suite_test.go
diff --git a/internal/controller/vllmrouter_controller.go b/operator/internal/controller/vllmrouter_controller.go
similarity index 100%
rename from internal/controller/vllmrouter_controller.go
rename to operator/internal/controller/vllmrouter_controller.go
diff --git a/internal/controller/vllmrouter_controller_test.go b/operator/internal/controller/vllmrouter_controller_test.go
similarity index 100%
rename from internal/controller/vllmrouter_controller_test.go
rename to operator/internal/controller/vllmrouter_controller_test.go
diff --git a/internal/controller/vllmruntime_controller.go b/operator/internal/controller/vllmruntime_controller.go
similarity index 100%
rename from internal/controller/vllmruntime_controller.go
rename to operator/internal/controller/vllmruntime_controller.go
diff --git a/internal/controller/vllmruntime_controller_test.go b/operator/internal/controller/vllmruntime_controller_test.go
similarity index 100%
rename from internal/controller/vllmruntime_controller_test.go
rename to operator/internal/controller/vllmruntime_controller_test.go

From c402d55f63c5568c1e92bcacb0e194d7cfecb160 Mon Sep 17 00:00:00 2001
From: royyhuang <roy.y.huang@gmail.com>
Date: Fri, 9 May 2025 01:32:18 +0000
Subject: [PATCH 03/14] rename api group from serving.vllm.ai to
 production-stack.vllm.ai

Signed-off-by: royyhuang <roy.y.huang@gmail.com>
---
 operator/PROJECT                              |  6 ++---
 operator/api/v1alpha1/groupversion_info.go    |  6 ++---
 operator/cmd/main.go                          |  4 ++--
 ...production-stack.vllm.ai_vllmrouters.yaml} |  4 ++--
 ...roduction-stack.vllm.ai_vllmruntimes.yaml} |  4 ++--
 operator/config/crd/kustomization.yaml        |  4 ++--
 operator/config/rbac/role.yaml                |  6 ++---
 .../config/rbac/vllmrouter_admin_role.yaml    | 10 ++++----
 .../config/rbac/vllmrouter_editor_role.yaml   |  6 ++---
 .../config/rbac/vllmrouter_viewer_role.yaml   |  6 ++---
 .../config/rbac/vllmruntime_admin_role.yaml   |  6 ++---
 .../config/rbac/vllmruntime_editor_role.yaml  |  6 ++---
 .../config/rbac/vllmruntime_viewer_role.yaml  |  6 ++---
 operator/config/samples/kustomization.yaml    |  4 ++--
 .../samples/serving_v1alpha1_router.yaml      |  2 +-
 .../samples/serving_v1alpha1_vllmruntime.yaml |  2 +-
 .../controller/vllmrouter_controller.go       |  8 +++----
 .../controller/vllmrouter_controller_test.go  |  8 +++----
 .../controller/vllmruntime_controller.go      | 24 +++++++++----------
 .../controller/vllmruntime_controller_test.go |  8 +++----
 20 files changed, 65 insertions(+), 65 deletions(-)
 rename operator/config/crd/bases/{serving.vllm.ai_vllmrouters.yaml => production-stack.vllm.ai_vllmrouters.yaml} (99%)
 rename operator/config/crd/bases/{serving.vllm.ai_vllmruntimes.yaml => production-stack.vllm.ai_vllmruntimes.yaml} (98%)

diff --git a/operator/PROJECT b/operator/PROJECT
index 97a77c2eb..696ec22c5 100644
--- a/operator/PROJECT
+++ b/operator/PROJECT
@@ -13,7 +13,7 @@ resources:
     namespaced: true
   controller: true
   domain: vllm.ai
-  group: serving
+  group: production stack
   kind: VLLMRuntime
   path: production-stack/api/v1alpha1
   version: v1alpha1
@@ -22,8 +22,8 @@ resources:
     namespaced: true
   controller: true
   domain: vllm.ai
-  group: serving
-  kind: Router
+  group: production-stack
+  kind: VLLMRouter
   path: production-stack/api/v1alpha1
   version: v1alpha1
 version: "3"
diff --git a/operator/api/v1alpha1/groupversion_info.go b/operator/api/v1alpha1/groupversion_info.go
index 9d3c2bf50..25304d68a 100644
--- a/operator/api/v1alpha1/groupversion_info.go
+++ b/operator/api/v1alpha1/groupversion_info.go
@@ -14,9 +14,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-// Package v1alpha1 contains API Schema definitions for the serving v1alpha1 API group.
+// Package v1alpha1 contains API Schema definitions for the production-stack v1alpha1 API group.
 // +kubebuilder:object:generate=true
-// +groupName=serving.vllm.ai
+// +groupName=production-stack.vllm.ai
 package v1alpha1
 
 import (
@@ -26,7 +26,7 @@ import (
 
 var (
 	// GroupVersion is group version used to register these objects.
-	GroupVersion = schema.GroupVersion{Group: "serving.vllm.ai", Version: "v1alpha1"}
+	GroupVersion = schema.GroupVersion{Group: "production-stack.vllm.ai", Version: "v1alpha1"}
 
 	// SchemeBuilder is used to add go types to the GroupVersionKind scheme.
 	SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
diff --git a/operator/cmd/main.go b/operator/cmd/main.go
index 94c41e44b..1a32cd700 100644
--- a/operator/cmd/main.go
+++ b/operator/cmd/main.go
@@ -37,7 +37,7 @@ import (
 	metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
 	"sigs.k8s.io/controller-runtime/pkg/webhook"
 
-	servingv1alpha1 "production-stack/api/v1alpha1"
+	productionstackv1alpha1 "production-stack/api/v1alpha1"
 	"production-stack/internal/controller"
 	// +kubebuilder:scaffold:imports
 )
@@ -50,7 +50,7 @@ var (
 func init() {
 	utilruntime.Must(clientgoscheme.AddToScheme(scheme))
 
-	utilruntime.Must(servingv1alpha1.AddToScheme(scheme))
+	utilruntime.Must(productionstackv1alpha1.AddToScheme(scheme))
 	// +kubebuilder:scaffold:scheme
 }
 
diff --git a/operator/config/crd/bases/serving.vllm.ai_vllmrouters.yaml b/operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml
similarity index 99%
rename from operator/config/crd/bases/serving.vllm.ai_vllmrouters.yaml
rename to operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml
index 7445c2e4c..3506964bc 100644
--- a/operator/config/crd/bases/serving.vllm.ai_vllmrouters.yaml
+++ b/operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml
@@ -4,9 +4,9 @@ kind: CustomResourceDefinition
 metadata:
   annotations:
     controller-gen.kubebuilder.io/version: v0.17.2
-  name: vllmrouters.serving.vllm.ai
+  name: vllmrouters.production-stack.vllm.ai
 spec:
-  group: serving.vllm.ai
+  group: production-stack.vllm.ai
   names:
     kind: VLLMRouter
     listKind: VLLMRouterList
diff --git a/operator/config/crd/bases/serving.vllm.ai_vllmruntimes.yaml b/operator/config/crd/bases/production-stack.vllm.ai_vllmruntimes.yaml
similarity index 98%
rename from operator/config/crd/bases/serving.vllm.ai_vllmruntimes.yaml
rename to operator/config/crd/bases/production-stack.vllm.ai_vllmruntimes.yaml
index 311dc82a8..c3f21d2ac 100644
--- a/operator/config/crd/bases/serving.vllm.ai_vllmruntimes.yaml
+++ b/operator/config/crd/bases/production-stack.vllm.ai_vllmruntimes.yaml
@@ -4,9 +4,9 @@ kind: CustomResourceDefinition
 metadata:
   annotations:
     controller-gen.kubebuilder.io/version: v0.17.2
-  name: vllmruntimes.serving.vllm.ai
+  name: vllmruntimes.production-stack.vllm.ai
 spec:
-  group: serving.vllm.ai
+  group: production-stack.vllm.ai
   names:
     kind: VLLMRuntime
     listKind: VLLMRuntimeList
diff --git a/operator/config/crd/kustomization.yaml b/operator/config/crd/kustomization.yaml
index 500f2b34a..b3158c4de 100644
--- a/operator/config/crd/kustomization.yaml
+++ b/operator/config/crd/kustomization.yaml
@@ -2,8 +2,8 @@
 # since it depends on service name and namespace that are out of this kustomize package.
 # It should be run by config/default
 resources:
-- bases/serving.vllm.ai_vllmruntimes.yaml
-- bases/serving.vllm.ai_vllmrouters.yaml
+- bases/production-stack.vllm.ai_vllmruntimes.yaml
+- bases/production-stack.vllm.ai_vllmrouters.yaml
 # +kubebuilder:scaffold:crdkustomizeresource
 
 patches:
diff --git a/operator/config/rbac/role.yaml b/operator/config/rbac/role.yaml
index 04f6f081a..941b757ec 100644
--- a/operator/config/rbac/role.yaml
+++ b/operator/config/rbac/role.yaml
@@ -31,7 +31,7 @@ rules:
   - update
   - watch
 - apiGroups:
-  - serving.vllm.ai
+  - production-stack.vllm.ai
   resources:
   - vllmrouters
   - vllmruntimes
@@ -44,14 +44,14 @@ rules:
   - update
   - watch
 - apiGroups:
-  - serving.vllm.ai
+  - production-stack.vllm.ai
   resources:
   - vllmrouters/finalizers
   - vllmruntimes/finalizers
   verbs:
   - update
 - apiGroups:
-  - serving.vllm.ai
+  - production-stack.vllm.ai
   resources:
   - vllmrouters/status
   - vllmruntimes/status
diff --git a/operator/config/rbac/vllmrouter_admin_role.yaml b/operator/config/rbac/vllmrouter_admin_role.yaml
index a42914d6b..0efbe62ae 100644
--- a/operator/config/rbac/vllmrouter_admin_role.yaml
+++ b/operator/config/rbac/vllmrouter_admin_role.yaml
@@ -1,7 +1,7 @@
 # This rule is not used by the project production-stack itself.
 # It is provided to allow the cluster admin to help manage permissions for users.
 #
-# Grants full permissions ('*') over serving.vllm.ai.
+# Grants full permissions ('*') over production-stack.vllm.ai.
 # This role is intended for users authorized to modify roles and bindings within the cluster,
 # enabling them to delegate specific permissions to other users or groups as needed.
 
@@ -14,14 +14,14 @@ metadata:
   name: vllmrouter-admin-role
 rules:
 - apiGroups:
-  - serving.vllm.ai
+  - production-stack.vllm.ai
   resources:
-  - routers
+  - vllmrouters
   verbs:
   - '*'
 - apiGroups:
-  - serving.vllm.ai
+  - production-stack.vllm.ai
   resources:
-  - routers/status
+  - vllmrouters/status
   verbs:
   - get
diff --git a/operator/config/rbac/vllmrouter_editor_role.yaml b/operator/config/rbac/vllmrouter_editor_role.yaml
index 0fbf83a34..4cef51a05 100644
--- a/operator/config/rbac/vllmrouter_editor_role.yaml
+++ b/operator/config/rbac/vllmrouter_editor_role.yaml
@@ -1,7 +1,7 @@
 # This rule is not used by the project production-stack itself.
 # It is provided to allow the cluster admin to help manage permissions for users.
 #
-# Grants permissions to create, update, and delete resources within the serving.vllm.ai.
+# Grants permissions to create, update, and delete resources within the production-stack.vllm.ai.
 # This role is intended for users who need to manage these resources
 # but should not control RBAC or manage permissions for others.
 
@@ -14,7 +14,7 @@ metadata:
   name: vllmrouter-editor-role
 rules:
 - apiGroups:
-  - serving.vllm.ai
+  - production-stack.vllm.ai
   resources:
   - vllmrouters
   verbs:
@@ -26,7 +26,7 @@ rules:
   - update
   - watch
 - apiGroups:
-  - serving.vllm.ai
+  - production-stack.vllm.ai
   resources:
   - vllmrouters/status
   verbs:
diff --git a/operator/config/rbac/vllmrouter_viewer_role.yaml b/operator/config/rbac/vllmrouter_viewer_role.yaml
index 5da0be3d5..4451ad09d 100644
--- a/operator/config/rbac/vllmrouter_viewer_role.yaml
+++ b/operator/config/rbac/vllmrouter_viewer_role.yaml
@@ -1,7 +1,7 @@
 # This rule is not used by the project production-stack itself.
 # It is provided to allow the cluster admin to help manage permissions for users.
 #
-# Grants read-only access to serving.vllm.ai resources.
+# Grants read-only access to production-stack.vllm.ai resources.
 # This role is intended for users who need visibility into these resources
 # without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing.
 
@@ -14,7 +14,7 @@ metadata:
   name: vllmrouter-viewer-role
 rules:
 - apiGroups:
-  - serving.vllm.ai
+  - production-stack.vllm.ai
   resources:
   - vllmrouters
   verbs:
@@ -22,7 +22,7 @@ rules:
   - list
   - watch
 - apiGroups:
-  - serving.vllm.ai
+  - production-stack.vllm.ai
   resources:
   - vllmrouters/status
   verbs:
diff --git a/operator/config/rbac/vllmruntime_admin_role.yaml b/operator/config/rbac/vllmruntime_admin_role.yaml
index f765cb599..26e6444a9 100644
--- a/operator/config/rbac/vllmruntime_admin_role.yaml
+++ b/operator/config/rbac/vllmruntime_admin_role.yaml
@@ -1,7 +1,7 @@
 # This rule is not used by the project production-stack itself.
 # It is provided to allow the cluster admin to help manage permissions for users.
 #
-# Grants full permissions ('*') over serving.vllm.ai.
+# Grants full permissions ('*') over production-stack.vllm.ai.
 # This role is intended for users authorized to modify roles and bindings within the cluster,
 # enabling them to delegate specific permissions to other users or groups as needed.
 
@@ -14,13 +14,13 @@ metadata:
   name: vllmruntime-admin-role
 rules:
 - apiGroups:
-  - serving.vllm.ai
+  - production-stack.vllm.ai
   resources:
   - vllmruntimes
   verbs:
   - '*'
 - apiGroups:
-  - serving.vllm.ai
+  - production-stack.vllm.ai
   resources:
   - vllmruntimes/status
   verbs:
diff --git a/operator/config/rbac/vllmruntime_editor_role.yaml b/operator/config/rbac/vllmruntime_editor_role.yaml
index c62691af5..71a1972d2 100644
--- a/operator/config/rbac/vllmruntime_editor_role.yaml
+++ b/operator/config/rbac/vllmruntime_editor_role.yaml
@@ -1,7 +1,7 @@
 # This rule is not used by the project production-stack itself.
 # It is provided to allow the cluster admin to help manage permissions for users.
 #
-# Grants permissions to create, update, and delete resources within the serving.vllm.ai.
+# Grants permissions to create, update, and delete resources within the production-stack.vllm.ai.
 # This role is intended for users who need to manage these resources
 # but should not control RBAC or manage permissions for others.
 
@@ -14,7 +14,7 @@ metadata:
   name: vllmruntime-editor-role
 rules:
 - apiGroups:
-  - serving.vllm.ai
+  - production-stack.vllm.ai
   resources:
   - vllmruntimes
   verbs:
@@ -26,7 +26,7 @@ rules:
   - update
   - watch
 - apiGroups:
-  - serving.vllm.ai
+  - production-stack.vllm.ai
   resources:
   - vllmruntimes/status
   verbs:
diff --git a/operator/config/rbac/vllmruntime_viewer_role.yaml b/operator/config/rbac/vllmruntime_viewer_role.yaml
index 1314e4715..6e7719f13 100644
--- a/operator/config/rbac/vllmruntime_viewer_role.yaml
+++ b/operator/config/rbac/vllmruntime_viewer_role.yaml
@@ -1,7 +1,7 @@
 # This rule is not used by the project production-stack itself.
 # It is provided to allow the cluster admin to help manage permissions for users.
 #
-# Grants read-only access to serving.vllm.ai resources.
+# Grants read-only access to production-stack.vllm.ai resources.
 # This role is intended for users who need visibility into these resources
 # without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing.
 
@@ -14,7 +14,7 @@ metadata:
   name: vllmruntime-viewer-role
 rules:
 - apiGroups:
-  - serving.vllm.ai
+  - production-stack.vllm.ai
   resources:
   - vllmruntimes
   verbs:
@@ -22,7 +22,7 @@ rules:
   - list
   - watch
 - apiGroups:
-  - serving.vllm.ai
+  - production-stack.vllm.ai
   resources:
   - vllmruntimes/status
   verbs:
diff --git a/operator/config/samples/kustomization.yaml b/operator/config/samples/kustomization.yaml
index 8a0f43239..3cd7d3881 100644
--- a/operator/config/samples/kustomization.yaml
+++ b/operator/config/samples/kustomization.yaml
@@ -3,6 +3,6 @@ kind: Kustomization
 
 ## Append samples of your project ##
 resources:
-- serving_v1alpha1_vllmruntime.yaml
-- serving_v1alpha1_router.yaml
+- production-stack_v1alpha1_vllmruntime.yaml
+- production-stack_v1alpha1_vllmrouter.yaml
 # +kubebuilder:scaffold:manifestskustomizesamples
diff --git a/operator/config/samples/serving_v1alpha1_router.yaml b/operator/config/samples/serving_v1alpha1_router.yaml
index 3400dace4..9d33d0329 100644
--- a/operator/config/samples/serving_v1alpha1_router.yaml
+++ b/operator/config/samples/serving_v1alpha1_router.yaml
@@ -1,4 +1,4 @@
-apiVersion: serving.vllm.ai/v1alpha1
+apiVersion: production-stack.vllm.ai/v1alpha1
 kind: VLLMRouter
 metadata:
   labels:
diff --git a/operator/config/samples/serving_v1alpha1_vllmruntime.yaml b/operator/config/samples/serving_v1alpha1_vllmruntime.yaml
index 3730c482f..1804900cb 100644
--- a/operator/config/samples/serving_v1alpha1_vllmruntime.yaml
+++ b/operator/config/samples/serving_v1alpha1_vllmruntime.yaml
@@ -1,4 +1,4 @@
-apiVersion: serving.vllm.ai/v1alpha1
+apiVersion: production-stack.vllm.ai/v1alpha1
 kind: VLLMRuntime
 metadata:
   labels:
diff --git a/operator/internal/controller/vllmrouter_controller.go b/operator/internal/controller/vllmrouter_controller.go
index 002f9cddb..6fc385a8a 100644
--- a/operator/internal/controller/vllmrouter_controller.go
+++ b/operator/internal/controller/vllmrouter_controller.go
@@ -42,10 +42,10 @@ type VLLMRouterReconciler struct {
 	Scheme *runtime.Scheme
 }
 
-// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmrouters,verbs=get;list;watch;create;update;patch;delete
-// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmrouters/status,verbs=get;update;patch
-// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmrouters/finalizers,verbs=update
-// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmruntimes,verbs=get;list;watch
+// +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=vllmrouters,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=vllmrouters/status,verbs=get;update;patch
+// +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=vllmrouters/finalizers,verbs=update
+// +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=vllmruntimes,verbs=get;list;watch
 // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete
 // +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;delete
 // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete
diff --git a/operator/internal/controller/vllmrouter_controller_test.go b/operator/internal/controller/vllmrouter_controller_test.go
index fe68a9f5c..1642ad5f8 100644
--- a/operator/internal/controller/vllmrouter_controller_test.go
+++ b/operator/internal/controller/vllmrouter_controller_test.go
@@ -27,7 +27,7 @@ import (
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 
-	servingv1alpha1 "production-stack/api/v1alpha1"
+	productionstackv1alpha1 "production-stack/api/v1alpha1"
 )
 
 var _ = Describe("VLLMRouter Controller", func() {
@@ -40,13 +40,13 @@ var _ = Describe("VLLMRouter Controller", func() {
 			Name:      resourceName,
 			Namespace: "default", // TODO(user):Modify as needed
 		}
-		router := &servingv1alpha1.VLLMRouter{}
+		router := &productionstackv1alpha1.VLLMRouter{}
 
 		BeforeEach(func() {
 			By("creating the custom resource for the Kind VLLMRouter")
 			err := k8sClient.Get(ctx, typeNamespacedName, router)
 			if err != nil && errors.IsNotFound(err) {
-				resource := &servingv1alpha1.VLLMRouter{
+				resource := &productionstackv1alpha1.VLLMRouter{
 					ObjectMeta: metav1.ObjectMeta{
 						Name:      resourceName,
 						Namespace: "default",
@@ -59,7 +59,7 @@ var _ = Describe("VLLMRouter Controller", func() {
 
 		AfterEach(func() {
 			// TODO(user): Cleanup logic after each test, like removing the resource instance.
-			resource := &servingv1alpha1.VLLMRouter{}
+			resource := &productionstackv1alpha1.VLLMRouter{}
 			err := k8sClient.Get(ctx, typeNamespacedName, resource)
 			Expect(err).NotTo(HaveOccurred())
 
diff --git a/operator/internal/controller/vllmruntime_controller.go b/operator/internal/controller/vllmruntime_controller.go
index 97cb59885..b6f15f075 100644
--- a/operator/internal/controller/vllmruntime_controller.go
+++ b/operator/internal/controller/vllmruntime_controller.go
@@ -33,7 +33,7 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/log"
 
-	servingv1alpha1 "production-stack/api/v1alpha1"
+	productionstackv1alpha1 "production-stack/api/v1alpha1"
 )
 
 // VLLMRuntimeReconciler reconciles a VLLMRuntime object
@@ -42,9 +42,9 @@ type VLLMRuntimeReconciler struct {
 	Scheme *runtime.Scheme
 }
 
-// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmruntimes,verbs=get;list;watch;create;update;patch;delete
-// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmruntimes/status,verbs=get;update;patch
-// +kubebuilder:rbac:groups=serving.vllm.ai,resources=vllmruntimes/finalizers,verbs=update
+// +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=vllmruntimes,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=vllmruntimes/status,verbs=get;update;patch
+// +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=vllmruntimes/finalizers,verbs=update
 // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete
 // +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;delete
 // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete
@@ -56,7 +56,7 @@ func (r *VLLMRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 	log := log.FromContext(ctx)
 
 	// Fetch the VLLMRuntime instance
-	vllmRuntime := &servingv1alpha1.VLLMRuntime{}
+	vllmRuntime := &productionstackv1alpha1.VLLMRuntime{}
 	err := r.Get(ctx, req.NamespacedName, vllmRuntime)
 	if err != nil {
 		if errors.IsNotFound(err) {
@@ -148,7 +148,7 @@ func (r *VLLMRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 }
 
 // deploymentForVLLMRuntime returns a VLLMRuntime Deployment object
-func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *servingv1alpha1.VLLMRuntime) *appsv1.Deployment {
+func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *productionstackv1alpha1.VLLMRuntime) *appsv1.Deployment {
 	labels := map[string]string{
 		"app": vllmRuntime.Name,
 	}
@@ -395,7 +395,7 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *servingv1a
 }
 
 // deploymentNeedsUpdate checks if the deployment needs to be updated
-func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr *servingv1alpha1.VLLMRuntime) bool {
+func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr *productionstackv1alpha1.VLLMRuntime) bool {
 	// Generate the expected deployment
 	expectedDep := r.deploymentForVLLMRuntime(vr)
 
@@ -460,9 +460,9 @@ func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr
 }
 
 // updateStatus updates the status of the VLLMRuntime
-func (r *VLLMRuntimeReconciler) updateStatus(ctx context.Context, vr *servingv1alpha1.VLLMRuntime, dep *appsv1.Deployment) error {
+func (r *VLLMRuntimeReconciler) updateStatus(ctx context.Context, vr *productionstackv1alpha1.VLLMRuntime, dep *appsv1.Deployment) error {
 	// Re-read the VLLMRuntime to get the latest version
-	latestVR := &servingv1alpha1.VLLMRuntime{}
+	latestVR := &productionstackv1alpha1.VLLMRuntime{}
 	if err := r.Get(ctx, types.NamespacedName{Name: vr.Name, Namespace: vr.Namespace}, latestVR); err != nil {
 		return err
 	}
@@ -483,7 +483,7 @@ func (r *VLLMRuntimeReconciler) updateStatus(ctx context.Context, vr *servingv1a
 }
 
 // serviceForVLLMRuntime returns a VLLMRuntime Service object
-func (r *VLLMRuntimeReconciler) serviceForVLLMRuntime(vllmRuntime *servingv1alpha1.VLLMRuntime) *corev1.Service {
+func (r *VLLMRuntimeReconciler) serviceForVLLMRuntime(vllmRuntime *productionstackv1alpha1.VLLMRuntime) *corev1.Service {
 	labels := map[string]string{
 		"app": vllmRuntime.Name,
 	}
@@ -513,7 +513,7 @@ func (r *VLLMRuntimeReconciler) serviceForVLLMRuntime(vllmRuntime *servingv1alph
 }
 
 // serviceNeedsUpdate checks if the service needs to be updated
-func (r *VLLMRuntimeReconciler) serviceNeedsUpdate(svc *corev1.Service, vr *servingv1alpha1.VLLMRuntime) bool {
+func (r *VLLMRuntimeReconciler) serviceNeedsUpdate(svc *corev1.Service, vr *productionstackv1alpha1.VLLMRuntime) bool {
 	// Compare target port
 	expectedTargetPort := int(vr.Spec.Port)
 	actualTargetPort := svc.Spec.Ports[0].TargetPort.IntValue()
@@ -527,7 +527,7 @@ func (r *VLLMRuntimeReconciler) serviceNeedsUpdate(svc *corev1.Service, vr *serv
 // SetupWithManager sets up the controller with the Manager.
 func (r *VLLMRuntimeReconciler) SetupWithManager(mgr ctrl.Manager) error {
 	return ctrl.NewControllerManagedBy(mgr).
-		For(&servingv1alpha1.VLLMRuntime{}).
+		For(&productionstackv1alpha1.VLLMRuntime{}).
 		Owns(&appsv1.Deployment{}).
 		Owns(&corev1.Service{}).
 		Complete(r)
diff --git a/operator/internal/controller/vllmruntime_controller_test.go b/operator/internal/controller/vllmruntime_controller_test.go
index 9f07fc776..f808ada78 100644
--- a/operator/internal/controller/vllmruntime_controller_test.go
+++ b/operator/internal/controller/vllmruntime_controller_test.go
@@ -27,7 +27,7 @@ import (
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 
-	servingv1alpha1 "production-stack/api/v1alpha1"
+	productionstackv1alpha1 "production-stack/api/v1alpha1"
 )
 
 var _ = Describe("VLLMRuntime Controller", func() {
@@ -40,13 +40,13 @@ var _ = Describe("VLLMRuntime Controller", func() {
 			Name:      resourceName,
 			Namespace: "default", // TODO(user):Modify as needed
 		}
-		vllmruntime := &servingv1alpha1.VLLMRuntime{}
+		vllmruntime := &productionstackv1alpha1.VLLMRuntime{}
 
 		BeforeEach(func() {
 			By("creating the custom resource for the Kind VLLMRuntime")
 			err := k8sClient.Get(ctx, typeNamespacedName, vllmruntime)
 			if err != nil && errors.IsNotFound(err) {
-				resource := &servingv1alpha1.VLLMRuntime{
+				resource := &productionstackv1alpha1.VLLMRuntime{
 					ObjectMeta: metav1.ObjectMeta{
 						Name:      resourceName,
 						Namespace: "default",
@@ -59,7 +59,7 @@ var _ = Describe("VLLMRuntime Controller", func() {
 
 		AfterEach(func() {
 			// TODO(user): Cleanup logic after each test, like removing the resource instance.
-			resource := &servingv1alpha1.VLLMRuntime{}
+			resource := &productionstackv1alpha1.VLLMRuntime{}
 			err := k8sClient.Get(ctx, typeNamespacedName, resource)
 			Expect(err).NotTo(HaveOccurred())
 

From 55aad79a4c2e1d224671e6f69de66801f0ff1336 Mon Sep 17 00:00:00 2001
From: royyhuang <roy.y.huang@gmail.com>
Date: Fri, 9 May 2025 03:31:04 +0000
Subject: [PATCH 04/14] enable lmcache cpu offloading

Signed-off-by: royyhuang <roy.y.huang@gmail.com>
---
 .codespell-ignore                             |   2 +
 .gitignore                                    |   3 -
 operator/api/v1alpha1/vllmrouter_types.go     |   4 +-
 .../production-stack.vllm.ai_vllmrouters.yaml |  14 +-
 ... production-stack_v1alpha_vllmrouter.yaml} |   4 +-
 ...production-stack_v1alpha_vllmruntime.yaml} |   6 +-
 operator/go.mod                               |   2 +-
 operator/internal/controller/suite_test.go    |   4 +-
 .../controller/vllmrouter_controller.go       |  44 ++-
 .../controller/vllmruntime_controller.go      |  59 +++-
 operator/test/e2e/e2e_suite_test.go           |  89 +++++
 operator/test/e2e/e2e_test.go                 | 329 ++++++++++++++++++
 operator/test/utils/utils.go                  | 251 +++++++++++++
 13 files changed, 755 insertions(+), 56 deletions(-)
 rename operator/config/samples/{serving_v1alpha1_router.yaml => production-stack_v1alpha_vllmrouter.yaml} (95%)
 rename operator/config/samples/{serving_v1alpha1_vllmruntime.yaml => production-stack_v1alpha_vllmruntime.yaml} (93%)
 create mode 100644 operator/test/e2e/e2e_suite_test.go
 create mode 100644 operator/test/e2e/e2e_test.go
 create mode 100644 operator/test/utils/utils.go

diff --git a/.codespell-ignore b/.codespell-ignore
index bd1259c69..d12a466dc 100644
--- a/.codespell-ignore
+++ b/.codespell-ignore
@@ -1,2 +1,4 @@
 AKS
 aks
+NotIn
+AfterAll
diff --git a/.gitignore b/.gitignore
index b0adb89f1..1341491df 100644
--- a/.gitignore
+++ b/.gitignore
@@ -178,6 +178,3 @@ docs/book/src/docs
 /testdata/**/go.sum
 /docs/book/src/simple-external-plugin-tutorial/testdata/sampleexternalplugin/v1/bin
 /testdata/**legacy**
-
-# skip operator's test for now
-operator/test
diff --git a/operator/api/v1alpha1/vllmrouter_types.go b/operator/api/v1alpha1/vllmrouter_types.go
index 2bf632949..446ee10b6 100644
--- a/operator/api/v1alpha1/vllmrouter_types.go
+++ b/operator/api/v1alpha1/vllmrouter_types.go
@@ -58,10 +58,10 @@ type VLLMRouterSpec struct {
 	SessionKey string `json:"sessionKey,omitempty"`
 
 	// EngineScrapeInterval for collecting engine statistics
-	EngineScrapeInterval string `json:"engineScrapeInterval,omitempty"`
+	EngineScrapeInterval int32 `json:"engineScrapeInterval,omitempty"`
 
 	// RequestStatsWindow for request statistics
-	RequestStatsWindow string `json:"requestStatsWindow,omitempty"`
+	RequestStatsWindow int32 `json:"requestStatsWindow,omitempty"`
 
 	// ExtraArgs for additional router arguments
 	ExtraArgs []string `json:"extraArgs,omitempty"`
diff --git a/operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml b/operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml
index 3506964bc..8668380c3 100644
--- a/operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml
+++ b/operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml
@@ -45,7 +45,8 @@ spec:
                 type: boolean
               engineScrapeInterval:
                 description: EngineScrapeInterval for collecting engine statistics
-                type: string
+                format: int32
+                type: integer
               env:
                 description: Environment variables
                 items:
@@ -102,11 +103,11 @@ spec:
                           operator:
                             description: |-
                               Represents a key's relationship to a set of values.
-                              Valid operators are In, Not In, Exists, DoesNotExist. Gt, and Lt.
+                              Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
                             type: string
                           values:
                             description: |-
-                              An array of string values. If the operator is In or Not In,
+                              An array of string values. If the operator is In or NotIn,
                               the values array must be non-empty. If the operator is Exists or DoesNotExist,
                               the values array must be empty. If the operator is Gt or Lt, the values
                               array must have a single element, which will be interpreted as an integer.
@@ -135,11 +136,11 @@ spec:
                           operator:
                             description: |-
                               Represents a key's relationship to a set of values.
-                              Valid operators are In, Not In, Exists, DoesNotExist. Gt, and Lt.
+                              Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
                             type: string
                           values:
                             description: |-
-                              An array of string values. If the operator is In or Not In,
+                              An array of string values. If the operator is In or NotIn,
                               the values array must be non-empty. If the operator is Exists or DoesNotExist,
                               the values array must be empty. If the operator is Gt or Lt, the values
                               array must have a single element, which will be interpreted as an integer.
@@ -169,7 +170,8 @@ spec:
                 type: integer
               requestStatsWindow:
                 description: RequestStatsWindow for request statistics
-                type: string
+                format: int32
+                type: integer
               resources:
                 description: Resource requirements
                 properties:
diff --git a/operator/config/samples/serving_v1alpha1_router.yaml b/operator/config/samples/production-stack_v1alpha_vllmrouter.yaml
similarity index 95%
rename from operator/config/samples/serving_v1alpha1_router.yaml
rename to operator/config/samples/production-stack_v1alpha_vllmrouter.yaml
index 9d33d0329..c774472dd 100644
--- a/operator/config/samples/serving_v1alpha1_router.yaml
+++ b/operator/config/samples/production-stack_v1alpha_vllmrouter.yaml
@@ -19,10 +19,10 @@ spec:
   routingLogic: roundrobin
 
   # Engine statistics collection interval
-  engineScrapeInterval: "30"
+  engineScrapeInterval: 30
 
   # Request statistics window
-  requestStatsWindow: "60"
+  requestStatsWindow: 60
 
   # Container port for the router service
   port: 80
diff --git a/operator/config/samples/serving_v1alpha1_vllmruntime.yaml b/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml
similarity index 93%
rename from operator/config/samples/serving_v1alpha1_vllmruntime.yaml
rename to operator/config/samples/production-stack_v1alpha_vllmruntime.yaml
index 1804900cb..0e13adbdc 100644
--- a/operator/config/samples/serving_v1alpha1_vllmruntime.yaml
+++ b/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml
@@ -14,13 +14,13 @@ spec:
   gpuMemoryUtilization: "0.8"
   maxLoras: 4
   extraArgs: ["--disable-log-requests"]
-  v1: false
+  v1: true
 
   # LM Cache configuration
   lmCacheConfig:
     enabled: true
-    cpuOffloadingBufferSize: "4Gi"
-    diskOffloadingBufferSize: "8Gi"
+    cpuOffloadingBufferSize: "15"
+    diskOffloadingBufferSize: "8"
     remoteUrl: ""
     remoteSerde: ""
 
diff --git a/operator/go.mod b/operator/go.mod
index be7288dec..34589b9a0 100644
--- a/operator/go.mod
+++ b/operator/go.mod
@@ -10,6 +10,7 @@ require (
 	k8s.io/apimachinery v0.33.0
 	k8s.io/client-go v0.33.0
 	sigs.k8s.io/controller-runtime v0.20.4
+	k8s.io/api v0.33.0
 )
 
 require (
@@ -85,7 +86,6 @@ require (
 	gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
-	k8s.io/api v0.33.0 // indirect
 	k8s.io/apiextensions-apiserver v0.32.1 // indirect
 	k8s.io/apiserver v0.32.1 // indirect
 	k8s.io/component-base v0.32.1 // indirect
diff --git a/operator/internal/controller/suite_test.go b/operator/internal/controller/suite_test.go
index 13578d9d9..5cbcdfe09 100644
--- a/operator/internal/controller/suite_test.go
+++ b/operator/internal/controller/suite_test.go
@@ -32,7 +32,7 @@ import (
 	logf "sigs.k8s.io/controller-runtime/pkg/log"
 	"sigs.k8s.io/controller-runtime/pkg/log/zap"
 
-	servingv1alpha1 "production-stack/api/v1alpha1"
+	productionstackv1alpha1 "production-stack/api/v1alpha1"
 	// +kubebuilder:scaffold:imports
 )
 
@@ -71,7 +71,7 @@ var _ = BeforeSuite(func() {
 	Expect(err).NotTo(HaveOccurred())
 	Expect(cfg).NotTo(BeNil())
 
-	err = servingv1alpha1.AddToScheme(scheme.Scheme)
+	err = productionstackv1alpha1.AddToScheme(scheme.Scheme)
 	Expect(err).NotTo(HaveOccurred())
 
 	// +kubebuilder:scaffold:scheme
diff --git a/operator/internal/controller/vllmrouter_controller.go b/operator/internal/controller/vllmrouter_controller.go
index 6fc385a8a..dc99ee433 100644
--- a/operator/internal/controller/vllmrouter_controller.go
+++ b/operator/internal/controller/vllmrouter_controller.go
@@ -29,6 +29,7 @@ import (
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/intstr"
+	"k8s.io/client-go/util/retry"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/log"
@@ -226,11 +227,11 @@ func (r *VLLMRouterReconciler) deploymentForVLLMRouter(router *servingv1alpha1.V
 	if router.Spec.SessionKey != "" {
 		args = append(args, "--session-key", router.Spec.SessionKey)
 	}
-	if router.Spec.EngineScrapeInterval != "" {
-		args = append(args, "--engine-stats-interval", router.Spec.EngineScrapeInterval)
+	if router.Spec.EngineScrapeInterval != 0 {
+		args = append(args, "--engine-stats-interval", fmt.Sprintf("%d", router.Spec.EngineScrapeInterval))
 	}
-	if router.Spec.RequestStatsWindow != "" {
-		args = append(args, "--request-stats-window", router.Spec.RequestStatsWindow)
+	if router.Spec.RequestStatsWindow != 0 {
+		args = append(args, "--request-stats-window", fmt.Sprintf("%d", router.Spec.RequestStatsWindow))
 	}
 	if router.Spec.ExtraArgs != nil {
 		args = append(args, router.Spec.ExtraArgs...)
@@ -323,24 +324,29 @@ func (r *VLLMRouterReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, rou
 
 // updateStatus updates the status of the VLLMRouter
 func (r *VLLMRouterReconciler) updateStatus(ctx context.Context, router *servingv1alpha1.VLLMRouter, dep *appsv1.Deployment) error {
-	// Re-read the VLLMRouter to get the latest version
-	latestRouter := &servingv1alpha1.VLLMRouter{}
-	if err := r.Get(ctx, types.NamespacedName{Name: router.Name, Namespace: router.Namespace}, latestRouter); err != nil {
-		return err
-	}
+	return retry.OnError(retry.DefaultRetry, func(err error) bool {
+		return errors.IsConflict(err)
+	}, func() error {
+		// Get the latest version of the VLLMRouter
+		latestRouter := &servingv1alpha1.VLLMRouter{}
+		if err := r.Get(ctx, types.NamespacedName{Name: router.Name, Namespace: router.Namespace}, latestRouter); err != nil {
+			return err
+		}
 
-	latestRouter.Status.LastUpdated = metav1.Now()
+		// Update the status fields
+		latestRouter.Status.LastUpdated = metav1.Now()
 
-	// Update VLLMRouter status based on deployment status
-	if dep.Status.AvailableReplicas > 0 {
-		latestRouter.Status.Status = "Ready"
-	} else if dep.Status.UpdatedReplicas > 0 {
-		latestRouter.Status.Status = "Updating"
-	} else {
-		latestRouter.Status.Status = "NotReady"
-	}
+		// Update VLLMRouter status based on deployment status
+		if dep.Status.AvailableReplicas > 0 {
+			latestRouter.Status.Status = "Ready"
+		} else if dep.Status.UpdatedReplicas > 0 {
+			latestRouter.Status.Status = "Updating"
+		} else {
+			latestRouter.Status.Status = "NotReady"
+		}
 
-	return r.Status().Update(ctx, latestRouter)
+		return r.Status().Update(ctx, latestRouter)
+	})
 }
 
 // serviceForVLLMRouter returns a VLLMRouter Service object
diff --git a/operator/internal/controller/vllmruntime_controller.go b/operator/internal/controller/vllmruntime_controller.go
index b6f15f075..a4937fc59 100644
--- a/operator/internal/controller/vllmruntime_controller.go
+++ b/operator/internal/controller/vllmruntime_controller.go
@@ -29,6 +29,7 @@ import (
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/intstr"
+	"k8s.io/client-go/util/retry"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/log"
@@ -246,6 +247,15 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production
 			},
 		)
 
+		// Add KV transfer config based on V1 flag
+		var lmcache_config string
+		if vllmRuntime.Spec.V1 {
+			lmcache_config = `{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}`
+		} else {
+			lmcache_config = `{"kv_connector":"LMCacheConnector","kv_role":"kv_both"}`
+		}
+		args = append(args, "--kv-transfer-config", lmcache_config)
+
 		if vllmRuntime.Spec.LMCacheConfig.CPUOffloadingBufferSize != "" {
 			env = append(env,
 				corev1.EnvVar{
@@ -437,6 +447,8 @@ func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr
 	actualEnabled := false
 	actualCPUOffloadingBufferSize := ""
 	actualDiskOffloadingBufferSize := ""
+	actualRemoteURL := ""
+	actualRemoteSerde := ""
 
 	for _, env := range actualLMCacheConfig {
 		switch env.Name {
@@ -446,13 +458,19 @@ func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr
 			actualCPUOffloadingBufferSize = env.Value
 		case "LMCACHE_MAX_LOCAL_DISK_SIZE":
 			actualDiskOffloadingBufferSize = env.Value
+		case "LMCACHE_REMOTE_URL":
+			actualRemoteURL = env.Value
+		case "LMCACHE_REMOTE_SERDE":
+			actualRemoteSerde = env.Value
 		}
 	}
 
 	// Compare specific fields
 	if expectedLMCacheConfig.Enabled != actualEnabled ||
 		expectedLMCacheConfig.CPUOffloadingBufferSize != actualCPUOffloadingBufferSize ||
-		expectedLMCacheConfig.DiskOffloadingBufferSize != actualDiskOffloadingBufferSize {
+		expectedLMCacheConfig.DiskOffloadingBufferSize != actualDiskOffloadingBufferSize ||
+		expectedLMCacheConfig.RemoteURL != actualRemoteURL ||
+		expectedLMCacheConfig.RemoteSerde != actualRemoteSerde {
 		return true
 	}
 
@@ -461,25 +479,30 @@ func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr
 
 // updateStatus updates the status of the VLLMRuntime
 func (r *VLLMRuntimeReconciler) updateStatus(ctx context.Context, vr *productionstackv1alpha1.VLLMRuntime, dep *appsv1.Deployment) error {
-	// Re-read the VLLMRuntime to get the latest version
-	latestVR := &productionstackv1alpha1.VLLMRuntime{}
-	if err := r.Get(ctx, types.NamespacedName{Name: vr.Name, Namespace: vr.Namespace}, latestVR); err != nil {
-		return err
-	}
-
-	latestVR.Status.LastUpdated = metav1.Now()
+	return retry.OnError(retry.DefaultRetry, func(err error) bool {
+		return errors.IsConflict(err)
+	}, func() error {
+		// Get the latest version of the VLLMRuntime
+		latestVR := &productionstackv1alpha1.VLLMRuntime{}
+		if err := r.Get(ctx, types.NamespacedName{Name: vr.Name, Namespace: vr.Namespace}, latestVR); err != nil {
+			return err
+		}
 
-	// Update model status based on deployment status
-	if dep.Status.AvailableReplicas > 0 {
-		latestVR.Status.ModelStatus = "Ready"
-	} else if dep.Status.UpdatedReplicas > 0 {
-		// If we have updated replicas but they're not yet available, mark as updating
-		latestVR.Status.ModelStatus = "Updating"
-	} else {
-		latestVR.Status.ModelStatus = "NotReady"
-	}
+		// Update the status fields
+		latestVR.Status.LastUpdated = metav1.Now()
+
+		// Update model status based on deployment status
+		if dep.Status.AvailableReplicas > 0 {
+			latestVR.Status.ModelStatus = "Ready"
+		} else if dep.Status.UpdatedReplicas > 0 {
+			// If we have updated replicas but they're not yet available, mark as updating
+			latestVR.Status.ModelStatus = "Updating"
+		} else {
+			latestVR.Status.ModelStatus = "NotReady"
+		}
 
-	return r.Status().Update(ctx, latestVR)
+		return r.Status().Update(ctx, latestVR)
+	})
 }
 
 // serviceForVLLMRuntime returns a VLLMRuntime Service object
diff --git a/operator/test/e2e/e2e_suite_test.go b/operator/test/e2e/e2e_suite_test.go
new file mode 100644
index 000000000..25caaab2a
--- /dev/null
+++ b/operator/test/e2e/e2e_suite_test.go
@@ -0,0 +1,89 @@
+/*
+Copyright 2025.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package e2e
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+
+	"production-stack/test/utils"
+)
+
+var (
+	// Optional Environment Variables:
+	// - CERT_MANAGER_INSTALL_SKIP=true: Skips CertManager installation during test setup.
+	// These variables are useful if CertManager is already installed, avoiding
+	// re-installation and conflicts.
+	skipCertManagerInstall = os.Getenv("CERT_MANAGER_INSTALL_SKIP") == "true"
+	// isCertManagerAlreadyInstalled will be set true when CertManager CRDs be found on the cluster
+	isCertManagerAlreadyInstalled = false
+
+	// projectImage is the name of the image which will be build and loaded
+	// with the code source changes to be tested.
+	projectImage = "example.com/production-stack:v0.0.1"
+)
+
+// TestE2E runs the end-to-end (e2e) test suite for the project. These tests execute in an isolated,
+// temporary environment to validate project changes with the purposed to be used in CI jobs.
+// The default setup requires Kind, builds/loads the Manager Docker image locally, and installs
+// CertManager.
+func TestE2E(t *testing.T) {
+	RegisterFailHandler(Fail)
+	_, _ = fmt.Fprintf(GinkgoWriter, "Starting production-stack integration test suite\n")
+	RunSpecs(t, "e2e suite")
+}
+
+var _ = BeforeSuite(func() {
+	By("building the manager(Operator) image")
+	cmd := exec.Command("make", "docker-build", fmt.Sprintf("IMG=%s", projectImage))
+	_, err := utils.Run(cmd)
+	ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to build the manager(Operator) image")
+
+	// TODO(user): If you want to change the e2e test vendor from Kind, ensure the image is
+	// built and available before running the tests. Also, remove the following block.
+	By("loading the manager(Operator) image on Kind")
+	err = utils.LoadImageToKindClusterWithName(projectImage)
+	ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to load the manager(Operator) image into Kind")
+
+	// The tests-e2e are intended to run on a temporary cluster that is created and destroyed for testing.
+	// To prevent errors when tests run in environments with CertManager already installed,
+	// we check for its presence before execution.
+	// Setup CertManager before the suite if not skipped and if not already installed
+	if !skipCertManagerInstall {
+		By("checking if cert manager is installed already")
+		isCertManagerAlreadyInstalled = utils.IsCertManagerCRDsInstalled()
+		if !isCertManagerAlreadyInstalled {
+			_, _ = fmt.Fprintf(GinkgoWriter, "Installing CertManager...\n")
+			Expect(utils.InstallCertManager()).To(Succeed(), "Failed to install CertManager")
+		} else {
+			_, _ = fmt.Fprintf(GinkgoWriter, "WARNING: CertManager is already installed. Skipping installation...\n")
+		}
+	}
+})
+
+var _ = AfterSuite(func() {
+	// Teardown CertManager after the suite if not skipped and if it was not already installed
+	if !skipCertManagerInstall && !isCertManagerAlreadyInstalled {
+		_, _ = fmt.Fprintf(GinkgoWriter, "Uninstalling CertManager...\n")
+		utils.UninstallCertManager()
+	}
+})
diff --git a/operator/test/e2e/e2e_test.go b/operator/test/e2e/e2e_test.go
new file mode 100644
index 000000000..e2c353d7f
--- /dev/null
+++ b/operator/test/e2e/e2e_test.go
@@ -0,0 +1,329 @@
+/*
+Copyright 2025.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package e2e
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"time"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+
+	"production-stack/test/utils"
+)
+
+// namespace where the project is deployed in
+const namespace = "production-stack-system"
+
+// serviceAccountName created for the project
+const serviceAccountName = "production-stack-controller-manager"
+
+// metricsServiceName is the name of the metrics service of the project
+const metricsServiceName = "production-stack-controller-manager-metrics-service"
+
+// metricsRoleBindingName is the name of the RBAC that will be created to allow get the metrics data
+const metricsRoleBindingName = "production-stack-metrics-binding"
+
+var _ = Describe("Manager", Ordered, func() {
+	var controllerPodName string
+
+	// Before running the tests, set up the environment by creating the namespace,
+	// enforce the restricted security policy to the namespace, installing CRDs,
+	// and deploying the controller.
+	BeforeAll(func() {
+		By("creating manager namespace")
+		cmd := exec.Command("kubectl", "create", "ns", namespace)
+		_, err := utils.Run(cmd)
+		Expect(err).NotTo(HaveOccurred(), "Failed to create namespace")
+
+		By("labeling the namespace to enforce the restricted security policy")
+		cmd = exec.Command("kubectl", "label", "--overwrite", "ns", namespace,
+			"pod-security.kubernetes.io/enforce=restricted")
+		_, err = utils.Run(cmd)
+		Expect(err).NotTo(HaveOccurred(), "Failed to label namespace with restricted policy")
+
+		By("installing CRDs")
+		cmd = exec.Command("make", "install")
+		_, err = utils.Run(cmd)
+		Expect(err).NotTo(HaveOccurred(), "Failed to install CRDs")
+
+		By("deploying the controller-manager")
+		cmd = exec.Command("make", "deploy", fmt.Sprintf("IMG=%s", projectImage))
+		_, err = utils.Run(cmd)
+		Expect(err).NotTo(HaveOccurred(), "Failed to deploy the controller-manager")
+	})
+
+	// After all tests have been executed, clean up by undeploying the controller, uninstalling CRDs,
+	// and deleting the namespace.
+	AfterAll(func() {
+		By("cleaning up the curl pod for metrics")
+		cmd := exec.Command("kubectl", "delete", "pod", "curl-metrics", "-n", namespace)
+		_, _ = utils.Run(cmd)
+
+		By("undeploying the controller-manager")
+		cmd = exec.Command("make", "undeploy")
+		_, _ = utils.Run(cmd)
+
+		By("uninstalling CRDs")
+		cmd = exec.Command("make", "uninstall")
+		_, _ = utils.Run(cmd)
+
+		By("removing manager namespace")
+		cmd = exec.Command("kubectl", "delete", "ns", namespace)
+		_, _ = utils.Run(cmd)
+	})
+
+	// After each test, check for failures and collect logs, events,
+	// and pod descriptions for debugging.
+	AfterEach(func() {
+		specReport := CurrentSpecReport()
+		if specReport.Failed() {
+			By("Fetching controller manager pod logs")
+			cmd := exec.Command("kubectl", "logs", controllerPodName, "-n", namespace)
+			controllerLogs, err := utils.Run(cmd)
+			if err == nil {
+				_, _ = fmt.Fprintf(GinkgoWriter, "Controller logs:\n %s", controllerLogs)
+			} else {
+				_, _ = fmt.Fprintf(GinkgoWriter, "Failed to get Controller logs: %s", err)
+			}
+
+			By("Fetching Kubernetes events")
+			cmd = exec.Command("kubectl", "get", "events", "-n", namespace, "--sort-by=.lastTimestamp")
+			eventsOutput, err := utils.Run(cmd)
+			if err == nil {
+				_, _ = fmt.Fprintf(GinkgoWriter, "Kubernetes events:\n%s", eventsOutput)
+			} else {
+				_, _ = fmt.Fprintf(GinkgoWriter, "Failed to get Kubernetes events: %s", err)
+			}
+
+			By("Fetching curl-metrics logs")
+			cmd = exec.Command("kubectl", "logs", "curl-metrics", "-n", namespace)
+			metricsOutput, err := utils.Run(cmd)
+			if err == nil {
+				_, _ = fmt.Fprintf(GinkgoWriter, "Metrics logs:\n %s", metricsOutput)
+			} else {
+				_, _ = fmt.Fprintf(GinkgoWriter, "Failed to get curl-metrics logs: %s", err)
+			}
+
+			By("Fetching controller manager pod description")
+			cmd = exec.Command("kubectl", "describe", "pod", controllerPodName, "-n", namespace)
+			podDescription, err := utils.Run(cmd)
+			if err == nil {
+				fmt.Println("Pod description:\n", podDescription)
+			} else {
+				fmt.Println("Failed to describe controller pod")
+			}
+		}
+	})
+
+	SetDefaultEventuallyTimeout(2 * time.Minute)
+	SetDefaultEventuallyPollingInterval(time.Second)
+
+	Context("Manager", func() {
+		It("should run successfully", func() {
+			By("validating that the controller-manager pod is running as expected")
+			verifyControllerUp := func(g Gomega) {
+				// Get the name of the controller-manager pod
+				cmd := exec.Command("kubectl", "get",
+					"pods", "-l", "control-plane=controller-manager",
+					"-o", "go-template={{ range .items }}"+
+						"{{ if not .metadata.deletionTimestamp }}"+
+						"{{ .metadata.name }}"+
+						"{{ \"\\n\" }}{{ end }}{{ end }}",
+					"-n", namespace,
+				)
+
+				podOutput, err := utils.Run(cmd)
+				g.Expect(err).NotTo(HaveOccurred(), "Failed to retrieve controller-manager pod information")
+				podNames := utils.GetNonEmptyLines(podOutput)
+				g.Expect(podNames).To(HaveLen(1), "expected 1 controller pod running")
+				controllerPodName = podNames[0]
+				g.Expect(controllerPodName).To(ContainSubstring("controller-manager"))
+
+				// Validate the pod's status
+				cmd = exec.Command("kubectl", "get",
+					"pods", controllerPodName, "-o", "jsonpath={.status.phase}",
+					"-n", namespace,
+				)
+				output, err := utils.Run(cmd)
+				g.Expect(err).NotTo(HaveOccurred())
+				g.Expect(output).To(Equal("Running"), "Incorrect controller-manager pod status")
+			}
+			Eventually(verifyControllerUp).Should(Succeed())
+		})
+
+		It("should ensure the metrics endpoint is serving metrics", func() {
+			By("creating a ClusterRoleBinding for the service account to allow access to metrics")
+			cmd := exec.Command("kubectl", "create", "clusterrolebinding", metricsRoleBindingName,
+				"--clusterrole=production-stack-metrics-reader",
+				fmt.Sprintf("--serviceaccount=%s:%s", namespace, serviceAccountName),
+			)
+			_, err := utils.Run(cmd)
+			Expect(err).NotTo(HaveOccurred(), "Failed to create ClusterRoleBinding")
+
+			By("validating that the metrics service is available")
+			cmd = exec.Command("kubectl", "get", "service", metricsServiceName, "-n", namespace)
+			_, err = utils.Run(cmd)
+			Expect(err).NotTo(HaveOccurred(), "Metrics service should exist")
+
+			By("getting the service account token")
+			token, err := serviceAccountToken()
+			Expect(err).NotTo(HaveOccurred())
+			Expect(token).NotTo(BeEmpty())
+
+			By("waiting for the metrics endpoint to be ready")
+			verifyMetricsEndpointReady := func(g Gomega) {
+				cmd := exec.Command("kubectl", "get", "endpoints", metricsServiceName, "-n", namespace)
+				output, err := utils.Run(cmd)
+				g.Expect(err).NotTo(HaveOccurred())
+				g.Expect(output).To(ContainSubstring("8443"), "Metrics endpoint is not ready")
+			}
+			Eventually(verifyMetricsEndpointReady).Should(Succeed())
+
+			By("verifying that the controller manager is serving the metrics server")
+			verifyMetricsServerStarted := func(g Gomega) {
+				cmd := exec.Command("kubectl", "logs", controllerPodName, "-n", namespace)
+				output, err := utils.Run(cmd)
+				g.Expect(err).NotTo(HaveOccurred())
+				g.Expect(output).To(ContainSubstring("controller-runtime.metrics\tServing metrics server"),
+					"Metrics server not yet started")
+			}
+			Eventually(verifyMetricsServerStarted).Should(Succeed())
+
+			By("creating the curl-metrics pod to access the metrics endpoint")
+			cmd = exec.Command("kubectl", "run", "curl-metrics", "--restart=Never",
+				"--namespace", namespace,
+				"--image=curlimages/curl:latest",
+				"--overrides",
+				fmt.Sprintf(`{
+					"spec": {
+						"containers": [{
+							"name": "curl",
+							"image": "curlimages/curl:latest",
+							"command": ["/bin/sh", "-c"],
+							"args": ["curl -v -k -H 'Authorization: Bearer %s' https://%s.%s.svc.cluster.local:8443/metrics"],
+							"securityContext": {
+								"allowPrivilegeEscalation": false,
+								"capabilities": {
+									"drop": ["ALL"]
+								},
+								"runAsNonRoot": true,
+								"runAsUser": 1000,
+								"seccompProfile": {
+									"type": "RuntimeDefault"
+								}
+							}
+						}],
+						"serviceAccount": "%s"
+					}
+				}`, token, metricsServiceName, namespace, serviceAccountName))
+			_, err = utils.Run(cmd)
+			Expect(err).NotTo(HaveOccurred(), "Failed to create curl-metrics pod")
+
+			By("waiting for the curl-metrics pod to complete.")
+			verifyCurlUp := func(g Gomega) {
+				cmd := exec.Command("kubectl", "get", "pods", "curl-metrics",
+					"-o", "jsonpath={.status.phase}",
+					"-n", namespace)
+				output, err := utils.Run(cmd)
+				g.Expect(err).NotTo(HaveOccurred())
+				g.Expect(output).To(Equal("Succeeded"), "curl pod in wrong status")
+			}
+			Eventually(verifyCurlUp, 5*time.Minute).Should(Succeed())
+
+			By("getting the metrics by checking curl-metrics logs")
+			metricsOutput := getMetricsOutput()
+			Expect(metricsOutput).To(ContainSubstring(
+				"controller_runtime_reconcile_total",
+			))
+		})
+
+		// +kubebuilder:scaffold:e2e-webhooks-checks
+
+		// TODO: Customize the e2e test suite with scenarios specific to your project.
+		// Consider applying sample/CR(s) and check their status and/or verifying
+		// the reconciliation by using the metrics, i.e.:
+		// metricsOutput := getMetricsOutput()
+		// Expect(metricsOutput).To(ContainSubstring(
+		//    fmt.Sprintf(`controller_runtime_reconcile_total{controller="%s",result="success"} 1`,
+		//    strings.ToLower(<Kind>),
+		// ))
+	})
+})
+
+// serviceAccountToken returns a token for the specified service account in the given namespace.
+// It uses the Kubernetes TokenRequest API to generate a token by directly sending a request
+// and parsing the resulting token from the API response.
+func serviceAccountToken() (string, error) {
+	const tokenRequestRawString = `{
+		"apiVersion": "authentication.k8s.io/v1",
+		"kind": "TokenRequest"
+	}`
+
+	// Temporary file to store the token request
+	secretName := fmt.Sprintf("%s-token-request", serviceAccountName)
+	tokenRequestFile := filepath.Join("/tmp", secretName)
+	err := os.WriteFile(tokenRequestFile, []byte(tokenRequestRawString), os.FileMode(0o644))
+	if err != nil {
+		return "", err
+	}
+
+	var out string
+	verifyTokenCreation := func(g Gomega) {
+		// Execute kubectl command to create the token
+		cmd := exec.Command("kubectl", "create", "--raw", fmt.Sprintf(
+			"/api/v1/namespaces/%s/serviceaccounts/%s/token",
+			namespace,
+			serviceAccountName,
+		), "-f", tokenRequestFile)
+
+		output, err := cmd.CombinedOutput()
+		g.Expect(err).NotTo(HaveOccurred())
+
+		// Parse the JSON output to extract the token
+		var token tokenRequest
+		err = json.Unmarshal(output, &token)
+		g.Expect(err).NotTo(HaveOccurred())
+
+		out = token.Status.Token
+	}
+	Eventually(verifyTokenCreation).Should(Succeed())
+
+	return out, err
+}
+
+// getMetricsOutput retrieves and returns the logs from the curl pod used to access the metrics endpoint.
+func getMetricsOutput() string {
+	By("getting the curl-metrics logs")
+	cmd := exec.Command("kubectl", "logs", "curl-metrics", "-n", namespace)
+	metricsOutput, err := utils.Run(cmd)
+	Expect(err).NotTo(HaveOccurred(), "Failed to retrieve logs from curl pod")
+	Expect(metricsOutput).To(ContainSubstring("< HTTP/1.1 200 OK"))
+	return metricsOutput
+}
+
+// tokenRequest is a simplified representation of the Kubernetes TokenRequest API response,
+// containing only the token field that we need to extract.
+type tokenRequest struct {
+	Status struct {
+		Token string `json:"token"`
+	} `json:"status"`
+}
diff --git a/operator/test/utils/utils.go b/operator/test/utils/utils.go
new file mode 100644
index 000000000..04a5141cc
--- /dev/null
+++ b/operator/test/utils/utils.go
@@ -0,0 +1,251 @@
+/*
+Copyright 2025.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package utils
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	"os"
+	"os/exec"
+	"strings"
+
+	. "github.com/onsi/ginkgo/v2" //nolint:golint,revive
+)
+
+const (
+	prometheusOperatorVersion = "v0.77.1"
+	prometheusOperatorURL     = "https://github.com/prometheus-operator/prometheus-operator/" +
+		"releases/download/%s/bundle.yaml"
+
+	certmanagerVersion = "v1.16.3"
+	certmanagerURLTmpl = "https://github.com/cert-manager/cert-manager/releases/download/%s/cert-manager.yaml"
+)
+
+func warnError(err error) {
+	_, _ = fmt.Fprintf(GinkgoWriter, "warning: %v\n", err)
+}
+
+// Run executes the provided command within this context
+func Run(cmd *exec.Cmd) (string, error) {
+	dir, _ := GetProjectDir()
+	cmd.Dir = dir
+
+	if err := os.Chdir(cmd.Dir); err != nil {
+		_, _ = fmt.Fprintf(GinkgoWriter, "chdir dir: %s\n", err)
+	}
+
+	cmd.Env = append(os.Environ(), "GO111MODULE=on")
+	command := strings.Join(cmd.Args, " ")
+	_, _ = fmt.Fprintf(GinkgoWriter, "running: %s\n", command)
+	output, err := cmd.CombinedOutput()
+	if err != nil {
+		return string(output), fmt.Errorf("%s failed with error: (%v) %s", command, err, string(output))
+	}
+
+	return string(output), nil
+}
+
+// InstallPrometheusOperator installs the prometheus Operator to be used to export the enabled metrics.
+func InstallPrometheusOperator() error {
+	url := fmt.Sprintf(prometheusOperatorURL, prometheusOperatorVersion)
+	cmd := exec.Command("kubectl", "create", "-f", url)
+	_, err := Run(cmd)
+	return err
+}
+
+// UninstallPrometheusOperator uninstalls the prometheus
+func UninstallPrometheusOperator() {
+	url := fmt.Sprintf(prometheusOperatorURL, prometheusOperatorVersion)
+	cmd := exec.Command("kubectl", "delete", "-f", url)
+	if _, err := Run(cmd); err != nil {
+		warnError(err)
+	}
+}
+
+// IsPrometheusCRDsInstalled checks if any Prometheus CRDs are installed
+// by verifying the existence of key CRDs related to Prometheus.
+func IsPrometheusCRDsInstalled() bool {
+	// List of common Prometheus CRDs
+	prometheusCRDs := []string{
+		"prometheuses.monitoring.coreos.com",
+		"prometheusrules.monitoring.coreos.com",
+		"prometheusagents.monitoring.coreos.com",
+	}
+
+	cmd := exec.Command("kubectl", "get", "crds", "-o", "custom-columns=NAME:.metadata.name")
+	output, err := Run(cmd)
+	if err != nil {
+		return false
+	}
+	crdList := GetNonEmptyLines(output)
+	for _, crd := range prometheusCRDs {
+		for _, line := range crdList {
+			if strings.Contains(line, crd) {
+				return true
+			}
+		}
+	}
+
+	return false
+}
+
+// UninstallCertManager uninstalls the cert manager
+func UninstallCertManager() {
+	url := fmt.Sprintf(certmanagerURLTmpl, certmanagerVersion)
+	cmd := exec.Command("kubectl", "delete", "-f", url)
+	if _, err := Run(cmd); err != nil {
+		warnError(err)
+	}
+}
+
+// InstallCertManager installs the cert manager bundle.
+func InstallCertManager() error {
+	url := fmt.Sprintf(certmanagerURLTmpl, certmanagerVersion)
+	cmd := exec.Command("kubectl", "apply", "-f", url)
+	if _, err := Run(cmd); err != nil {
+		return err
+	}
+	// Wait for cert-manager-webhook to be ready, which can take time if cert-manager
+	// was re-installed after uninstalling on a cluster.
+	cmd = exec.Command("kubectl", "wait", "deployment.apps/cert-manager-webhook",
+		"--for", "condition=Available",
+		"--namespace", "cert-manager",
+		"--timeout", "5m",
+	)
+
+	_, err := Run(cmd)
+	return err
+}
+
+// IsCertManagerCRDsInstalled checks if any Cert Manager CRDs are installed
+// by verifying the existence of key CRDs related to Cert Manager.
+func IsCertManagerCRDsInstalled() bool {
+	// List of common Cert Manager CRDs
+	certManagerCRDs := []string{
+		"certificates.cert-manager.io",
+		"issuers.cert-manager.io",
+		"clusterissuers.cert-manager.io",
+		"certificaterequests.cert-manager.io",
+		"orders.acme.cert-manager.io",
+		"challenges.acme.cert-manager.io",
+	}
+
+	// Execute the kubectl command to get all CRDs
+	cmd := exec.Command("kubectl", "get", "crds")
+	output, err := Run(cmd)
+	if err != nil {
+		return false
+	}
+
+	// Check if any of the Cert Manager CRDs are present
+	crdList := GetNonEmptyLines(output)
+	for _, crd := range certManagerCRDs {
+		for _, line := range crdList {
+			if strings.Contains(line, crd) {
+				return true
+			}
+		}
+	}
+
+	return false
+}
+
+// LoadImageToKindClusterWithName loads a local docker image to the kind cluster
+func LoadImageToKindClusterWithName(name string) error {
+	cluster := "kind"
+	if v, ok := os.LookupEnv("KIND_CLUSTER"); ok {
+		cluster = v
+	}
+	kindOptions := []string{"load", "docker-image", name, "--name", cluster}
+	cmd := exec.Command("kind", kindOptions...)
+	_, err := Run(cmd)
+	return err
+}
+
+// GetNonEmptyLines converts given command output string into individual objects
+// according to line breakers, and ignores the empty elements in it.
+func GetNonEmptyLines(output string) []string {
+	var res []string
+	elements := strings.Split(output, "\n")
+	for _, element := range elements {
+		if element != "" {
+			res = append(res, element)
+		}
+	}
+
+	return res
+}
+
+// GetProjectDir will return the directory where the project is
+func GetProjectDir() (string, error) {
+	wd, err := os.Getwd()
+	if err != nil {
+		return wd, err
+	}
+	wd = strings.Replace(wd, "/test/e2e", "", -1)
+	return wd, nil
+}
+
+// UncommentCode searches for target in the file and remove the comment prefix
+// of the target content. The target content may span multiple lines.
+func UncommentCode(filename, target, prefix string) error {
+	// false positive
+	// nolint:gosec
+	content, err := os.ReadFile(filename)
+	if err != nil {
+		return err
+	}
+	strContent := string(content)
+
+	idx := strings.Index(strContent, target)
+	if idx < 0 {
+		return fmt.Errorf("unable to find the code %s to be uncomment", target)
+	}
+
+	out := new(bytes.Buffer)
+	_, err = out.Write(content[:idx])
+	if err != nil {
+		return err
+	}
+
+	scanner := bufio.NewScanner(bytes.NewBufferString(target))
+	if !scanner.Scan() {
+		return nil
+	}
+	for {
+		_, err := out.WriteString(strings.TrimPrefix(scanner.Text(), prefix))
+		if err != nil {
+			return err
+		}
+		// Avoid writing a newline in case the previous line was the last in target.
+		if !scanner.Scan() {
+			break
+		}
+		if _, err := out.WriteString("\n"); err != nil {
+			return err
+		}
+	}
+
+	_, err = out.Write(content[idx+len(target):])
+	if err != nil {
+		return err
+	}
+	// false positive
+	// nolint:gosec
+	return os.WriteFile(filename, out.Bytes(), 0644)
+}

From 46713c3f4e2d287fd8c0768b8a5f2ee0a8389aae Mon Sep 17 00:00:00 2001
From: royyhuang <roy.y.huang@gmail.com>
Date: Fri, 9 May 2025 06:03:05 +0000
Subject: [PATCH 05/14] enable lmcache remote cache server offloading

Signed-off-by: royyhuang <roy.y.huang@gmail.com>
---
 operator/PROJECT                              |   9 +
 operator/api/v1alpha1/cacheserver_types.go    |  82 +++++
 operator/api/v1alpha1/common.go               |  32 ++
 operator/api/v1alpha1/vllmruntime_types.go    |  15 -
 .../api/v1alpha1/zz_generated.deepcopy.go     |  92 ++++++
 operator/cmd/main.go                          |   7 +
 ...production-stack.vllm.ai_cacheservers.yaml | 113 +++++++
 operator/config/crd/kustomization.yaml        |   1 +
 .../config/rbac/cacheserver_admin_role.yaml   |  27 ++
 .../config/rbac/cacheserver_editor_role.yaml  |  33 ++
 .../config/rbac/cacheserver_viewer_role.yaml  |  29 ++
 operator/config/rbac/kustomization.yaml       |   3 +
 operator/config/rbac/role.yaml                |   3 +
 operator/config/samples/kustomization.yaml    |   1 +
 ...production-stack_v1alpha1_cacheserver.yaml |  28 ++
 .../production-stack_v1alpha_vllmruntime.yaml |   4 +-
 operator/go.mod                               |   2 +-
 .../controller/cacheserver_controller.go      | 289 ++++++++++++++++++
 .../controller/cacheserver_controller_test.go |  84 +++++
 19 files changed, 836 insertions(+), 18 deletions(-)
 create mode 100644 operator/api/v1alpha1/cacheserver_types.go
 create mode 100644 operator/api/v1alpha1/common.go
 create mode 100644 operator/config/crd/bases/production-stack.vllm.ai_cacheservers.yaml
 create mode 100644 operator/config/rbac/cacheserver_admin_role.yaml
 create mode 100644 operator/config/rbac/cacheserver_editor_role.yaml
 create mode 100644 operator/config/rbac/cacheserver_viewer_role.yaml
 create mode 100644 operator/config/samples/production-stack_v1alpha1_cacheserver.yaml
 create mode 100644 operator/internal/controller/cacheserver_controller.go
 create mode 100644 operator/internal/controller/cacheserver_controller_test.go

diff --git a/operator/PROJECT b/operator/PROJECT
index 696ec22c5..f59091172 100644
--- a/operator/PROJECT
+++ b/operator/PROJECT
@@ -26,4 +26,13 @@ resources:
   kind: VLLMRouter
   path: production-stack/api/v1alpha1
   version: v1alpha1
+- api:
+    crdVersion: v1
+    namespaced: true
+  controller: true
+  domain: vllm.ai
+  group: production-stack
+  kind: CacheServer
+  path: production-stack/api/v1alpha1
+  version: v1alpha1
 version: "3"
diff --git a/operator/api/v1alpha1/cacheserver_types.go b/operator/api/v1alpha1/cacheserver_types.go
new file mode 100644
index 000000000..08c44f139
--- /dev/null
+++ b/operator/api/v1alpha1/cacheserver_types.go
@@ -0,0 +1,82 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// EDIT THIS FILE!  THIS IS SCAFFOLDING FOR YOU TO OWN!
+// NOTE: json tags are required.  Any new fields you add must have json tags for the fields to be serialized.
+
+// CacheServerSpec defines the desired state of CacheServer
+type CacheServerSpec struct {
+	// Image configuration for the cache server
+	Image ImageSpec `json:"image"`
+
+	// Container port for the cache server
+	// +kubebuilder:default=8000
+	Port int32 `json:"port"`
+
+	// Resource requirements
+	Resources ResourceRequirements `json:"resources"`
+
+	// Number of replicas
+	// +kubebuilder:default=1
+	Replicas int32 `json:"replicas"`
+
+	// Deployment strategy
+	// +kubebuilder:validation:Enum=RollingUpdate;Recreate
+	// +kubebuilder:default=RollingUpdate
+	DeploymentStrategy string `json:"deploymentStrategy"`
+}
+
+// CacheServerStatus defines the observed state of CacheServer
+type CacheServerStatus struct {
+	// Last time the status was updated
+	LastUpdated metav1.Time `json:"lastUpdated,omitempty"`
+
+	// Current status of the cache server
+	Status string `json:"status,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+// +kubebuilder:subresource:status
+// +kubebuilder:printcolumn:name="Status",type="string",JSONPath=".status.status"
+// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"
+
+// CacheServer is the Schema for the cacheservers API
+type CacheServer struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	Spec   CacheServerSpec   `json:"spec,omitempty"`
+	Status CacheServerStatus `json:"status,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+
+// CacheServerList contains a list of CacheServer
+type CacheServerList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []CacheServer `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&CacheServer{}, &CacheServerList{})
+}
diff --git a/operator/api/v1alpha1/common.go b/operator/api/v1alpha1/common.go
new file mode 100644
index 000000000..065d9c689
--- /dev/null
+++ b/operator/api/v1alpha1/common.go
@@ -0,0 +1,32 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+// ResourceRequirements defines the resource requirements
+type ResourceRequirements struct {
+	CPU    string `json:"cpu,omitempty"`
+	Memory string `json:"memory,omitempty"`
+	GPU    string `json:"gpu,omitempty"`
+}
+
+// ImageSpec defines the container image configuration
+type ImageSpec struct {
+	Registry       string `json:"registry"`
+	Name           string `json:"name"`
+	PullPolicy     string `json:"pullPolicy,omitempty"`
+	PullSecretName string `json:"pullSecretName,omitempty"`
+}
diff --git a/operator/api/v1alpha1/vllmruntime_types.go b/operator/api/v1alpha1/vllmruntime_types.go
index 117a77463..1ce19c2b7 100644
--- a/operator/api/v1alpha1/vllmruntime_types.go
+++ b/operator/api/v1alpha1/vllmruntime_types.go
@@ -133,21 +133,6 @@ type EnvVar struct {
 	Value string `json:"value"`
 }
 
-// ResourceRequirements defines the resource requirements
-type ResourceRequirements struct {
-	CPU    string `json:"cpu,omitempty"`
-	Memory string `json:"memory,omitempty"`
-	GPU    string `json:"gpu,omitempty"`
-}
-
-// ImageSpec defines the container image configuration
-type ImageSpec struct {
-	Registry       string `json:"registry"`
-	Name           string `json:"name"`
-	PullPolicy     string `json:"pullPolicy,omitempty"`
-	PullSecretName string `json:"pullSecretName,omitempty"`
-}
-
 // VLLMRuntimeStatus defines the observed state of VLLMRuntime
 type VLLMRuntimeStatus struct {
 	// Model status
diff --git a/operator/api/v1alpha1/zz_generated.deepcopy.go b/operator/api/v1alpha1/zz_generated.deepcopy.go
index 51e91ba0a..21bbc16e8 100644
--- a/operator/api/v1alpha1/zz_generated.deepcopy.go
+++ b/operator/api/v1alpha1/zz_generated.deepcopy.go
@@ -25,6 +25,98 @@ import (
 	runtime "k8s.io/apimachinery/pkg/runtime"
 )
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *CacheServer) DeepCopyInto(out *CacheServer) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+	out.Spec = in.Spec
+	in.Status.DeepCopyInto(&out.Status)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CacheServer.
+func (in *CacheServer) DeepCopy() *CacheServer {
+	if in == nil {
+		return nil
+	}
+	out := new(CacheServer)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *CacheServer) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *CacheServerList) DeepCopyInto(out *CacheServerList) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ListMeta.DeepCopyInto(&out.ListMeta)
+	if in.Items != nil {
+		in, out := &in.Items, &out.Items
+		*out = make([]CacheServer, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CacheServerList.
+func (in *CacheServerList) DeepCopy() *CacheServerList {
+	if in == nil {
+		return nil
+	}
+	out := new(CacheServerList)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *CacheServerList) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *CacheServerSpec) DeepCopyInto(out *CacheServerSpec) {
+	*out = *in
+	out.Image = in.Image
+	out.Resources = in.Resources
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CacheServerSpec.
+func (in *CacheServerSpec) DeepCopy() *CacheServerSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(CacheServerSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *CacheServerStatus) DeepCopyInto(out *CacheServerStatus) {
+	*out = *in
+	in.LastUpdated.DeepCopyInto(&out.LastUpdated)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CacheServerStatus.
+func (in *CacheServerStatus) DeepCopy() *CacheServerStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(CacheServerStatus)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *EnvVar) DeepCopyInto(out *EnvVar) {
 	*out = *in
diff --git a/operator/cmd/main.go b/operator/cmd/main.go
index 1a32cd700..08c7e291b 100644
--- a/operator/cmd/main.go
+++ b/operator/cmd/main.go
@@ -217,6 +217,13 @@ func main() {
 		setupLog.Error(err, "unable to create controller", "controller", "VLLMRuntime")
 		os.Exit(1)
 	}
+	if err = (&controller.CacheServerReconciler{
+		Client: mgr.GetClient(),
+		Scheme: mgr.GetScheme(),
+	}).SetupWithManager(mgr); err != nil {
+		setupLog.Error(err, "unable to create controller", "controller", "CacheServer")
+		os.Exit(1)
+	}
 	// +kubebuilder:scaffold:builder
 
 	if metricsCertWatcher != nil {
diff --git a/operator/config/crd/bases/production-stack.vllm.ai_cacheservers.yaml b/operator/config/crd/bases/production-stack.vllm.ai_cacheservers.yaml
new file mode 100644
index 000000000..77f44149e
--- /dev/null
+++ b/operator/config/crd/bases/production-stack.vllm.ai_cacheservers.yaml
@@ -0,0 +1,113 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.17.2
+  name: cacheservers.production-stack.vllm.ai
+spec:
+  group: production-stack.vllm.ai
+  names:
+    kind: CacheServer
+    listKind: CacheServerList
+    plural: cacheservers
+    singular: cacheserver
+  scope: Namespaced
+  versions:
+  - additionalPrinterColumns:
+    - jsonPath: .status.status
+      name: Status
+      type: string
+    - jsonPath: .metadata.creationTimestamp
+      name: Age
+      type: date
+    name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: CacheServer is the Schema for the cacheservers API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: CacheServerSpec defines the desired state of CacheServer
+            properties:
+              deploymentStrategy:
+                default: RollingUpdate
+                description: Deployment strategy
+                enum:
+                - RollingUpdate
+                - Recreate
+                type: string
+              image:
+                description: Image configuration for the cache server
+                properties:
+                  name:
+                    type: string
+                  pullPolicy:
+                    type: string
+                  pullSecretName:
+                    type: string
+                  registry:
+                    type: string
+                required:
+                - name
+                - registry
+                type: object
+              port:
+                default: 8000
+                description: Container port for the cache server
+                format: int32
+                type: integer
+              replicas:
+                default: 1
+                description: Number of replicas
+                format: int32
+                type: integer
+              resources:
+                description: Resource requirements
+                properties:
+                  cpu:
+                    type: string
+                  gpu:
+                    type: string
+                  memory:
+                    type: string
+                type: object
+            required:
+            - deploymentStrategy
+            - image
+            - port
+            - replicas
+            - resources
+            type: object
+          status:
+            description: CacheServerStatus defines the observed state of CacheServer
+            properties:
+              lastUpdated:
+                description: Last time the status was updated
+                format: date-time
+                type: string
+              status:
+                description: Current status of the cache server
+                type: string
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/operator/config/crd/kustomization.yaml b/operator/config/crd/kustomization.yaml
index b3158c4de..560dffa30 100644
--- a/operator/config/crd/kustomization.yaml
+++ b/operator/config/crd/kustomization.yaml
@@ -4,6 +4,7 @@
 resources:
 - bases/production-stack.vllm.ai_vllmruntimes.yaml
 - bases/production-stack.vllm.ai_vllmrouters.yaml
+- bases/production-stack.vllm.ai_cacheservers.yaml
 # +kubebuilder:scaffold:crdkustomizeresource
 
 patches:
diff --git a/operator/config/rbac/cacheserver_admin_role.yaml b/operator/config/rbac/cacheserver_admin_role.yaml
new file mode 100644
index 000000000..fd462e309
--- /dev/null
+++ b/operator/config/rbac/cacheserver_admin_role.yaml
@@ -0,0 +1,27 @@
+# This rule is not used by the project production-stack itself.
+# It is provided to allow the cluster admin to help manage permissions for users.
+#
+# Grants full permissions ('*') over production-stack.vllm.ai.
+# This role is intended for users authorized to modify roles and bindings within the cluster,
+# enabling them to delegate specific permissions to other users or groups as needed.
+
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+  name: cacheserver-admin-role
+rules:
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - cacheservers
+  verbs:
+  - '*'
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - cacheservers/status
+  verbs:
+  - get
diff --git a/operator/config/rbac/cacheserver_editor_role.yaml b/operator/config/rbac/cacheserver_editor_role.yaml
new file mode 100644
index 000000000..2291c7c60
--- /dev/null
+++ b/operator/config/rbac/cacheserver_editor_role.yaml
@@ -0,0 +1,33 @@
+# This rule is not used by the project production-stack itself.
+# It is provided to allow the cluster admin to help manage permissions for users.
+#
+# Grants permissions to create, update, and delete resources within the production-stack.vllm.ai.
+# This role is intended for users who need to manage these resources
+# but should not control RBAC or manage permissions for others.
+
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+  name: cacheserver-editor-role
+rules:
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - cacheservers
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - cacheservers/status
+  verbs:
+  - get
diff --git a/operator/config/rbac/cacheserver_viewer_role.yaml b/operator/config/rbac/cacheserver_viewer_role.yaml
new file mode 100644
index 000000000..877a05a4c
--- /dev/null
+++ b/operator/config/rbac/cacheserver_viewer_role.yaml
@@ -0,0 +1,29 @@
+# This rule is not used by the project production-stack itself.
+# It is provided to allow the cluster admin to help manage permissions for users.
+#
+# Grants read-only access to production-stack.vllm.ai resources.
+# This role is intended for users who need visibility into these resources
+# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing.
+
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+  name: cacheserver-viewer-role
+rules:
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - cacheservers
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - cacheservers/status
+  verbs:
+  - get
diff --git a/operator/config/rbac/kustomization.yaml b/operator/config/rbac/kustomization.yaml
index 7d46d2f99..e47dad1b5 100644
--- a/operator/config/rbac/kustomization.yaml
+++ b/operator/config/rbac/kustomization.yaml
@@ -22,6 +22,9 @@ resources:
 # default, aiding admins in cluster management. Those roles are
 # not used by the {{ .ProjectName }} itself. You can comment the following lines
 # if you do not want those helpers be installed with your Project.
+- cacheserver_admin_role.yaml
+- cacheserver_editor_role.yaml
+- cacheserver_viewer_role.yaml
 - router_admin_role.yaml
 - router_editor_role.yaml
 - router_viewer_role.yaml
diff --git a/operator/config/rbac/role.yaml b/operator/config/rbac/role.yaml
index 941b757ec..10a45fe92 100644
--- a/operator/config/rbac/role.yaml
+++ b/operator/config/rbac/role.yaml
@@ -33,6 +33,7 @@ rules:
 - apiGroups:
   - production-stack.vllm.ai
   resources:
+  - cacheservers
   - vllmrouters
   - vllmruntimes
   verbs:
@@ -46,6 +47,7 @@ rules:
 - apiGroups:
   - production-stack.vllm.ai
   resources:
+  - cacheservers/finalizers
   - vllmrouters/finalizers
   - vllmruntimes/finalizers
   verbs:
@@ -53,6 +55,7 @@ rules:
 - apiGroups:
   - production-stack.vllm.ai
   resources:
+  - cacheservers/status
   - vllmrouters/status
   - vllmruntimes/status
   verbs:
diff --git a/operator/config/samples/kustomization.yaml b/operator/config/samples/kustomization.yaml
index 3cd7d3881..21e1d6999 100644
--- a/operator/config/samples/kustomization.yaml
+++ b/operator/config/samples/kustomization.yaml
@@ -5,4 +5,5 @@ kind: Kustomization
 resources:
 - production-stack_v1alpha1_vllmruntime.yaml
 - production-stack_v1alpha1_vllmrouter.yaml
+- production-stack_v1alpha1_cacheserver.yaml
 # +kubebuilder:scaffold:manifestskustomizesamples
diff --git a/operator/config/samples/production-stack_v1alpha1_cacheserver.yaml b/operator/config/samples/production-stack_v1alpha1_cacheserver.yaml
new file mode 100644
index 000000000..2304c8646
--- /dev/null
+++ b/operator/config/samples/production-stack_v1alpha1_cacheserver.yaml
@@ -0,0 +1,28 @@
+apiVersion: production-stack.vllm.ai/v1alpha1
+kind: CacheServer
+metadata:
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+  name: cacheserver-sample
+spec:
+  # Image configuration
+  image:
+    registry: "docker.io"
+    name: "lmcache/vllm-openai:2025-04-18"
+    pullPolicy: "IfNotPresent"
+    pullSecretName: ""
+
+  # Container port
+  port: 8000
+
+  # Resource requirements
+  resources:
+    cpu: "2"
+    memory: "16Gi"
+
+  # Number of replicas
+  replicas: 1
+
+  # Deployment strategy
+  deploymentStrategy: "Recreate"
diff --git a/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml b/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml
index 0e13adbdc..7b8947a4b 100644
--- a/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml
+++ b/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml
@@ -21,8 +21,8 @@ spec:
     enabled: true
     cpuOffloadingBufferSize: "15"
     diskOffloadingBufferSize: "8"
-    remoteUrl: ""
-    remoteSerde: ""
+    # remoteUrl: "lm://cacheserver-sample.default.svc.cluster.local:80"
+    # remoteSerde: "naive"
 
   # Model configuration
   model:
diff --git a/operator/go.mod b/operator/go.mod
index 34589b9a0..b158ae91f 100644
--- a/operator/go.mod
+++ b/operator/go.mod
@@ -7,10 +7,10 @@ toolchain go1.24.2
 require (
 	github.com/onsi/ginkgo/v2 v2.23.4
 	github.com/onsi/gomega v1.37.0
+	k8s.io/api v0.33.0
 	k8s.io/apimachinery v0.33.0
 	k8s.io/client-go v0.33.0
 	sigs.k8s.io/controller-runtime v0.20.4
-	k8s.io/api v0.33.0
 )
 
 require (
diff --git a/operator/internal/controller/cacheserver_controller.go b/operator/internal/controller/cacheserver_controller.go
new file mode 100644
index 000000000..1a534dd27
--- /dev/null
+++ b/operator/internal/controller/cacheserver_controller.go
@@ -0,0 +1,289 @@
+/*
+Copyright 2025.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package controller
+
+import (
+	"context"
+	"fmt"
+	"reflect"
+
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/errors"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/apimachinery/pkg/util/intstr"
+	"k8s.io/client-go/util/retry"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/log"
+
+	productionstackv1alpha1 "production-stack/api/v1alpha1"
+)
+
+// CacheServerReconciler reconciles a CacheServer object
+type CacheServerReconciler struct {
+	client.Client
+	Scheme *runtime.Scheme
+}
+
+// +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=cacheservers,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=cacheservers/status,verbs=get;update;patch
+// +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=cacheservers/finalizers,verbs=update
+// +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete
+
+// Reconcile is part of the main kubernetes reconciliation loop which aims to
+// move the current state of the cluster closer to the desired state.
+func (r *CacheServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+	log := log.FromContext(ctx)
+
+	// Fetch the CacheServer instance
+	cacheServer := &productionstackv1alpha1.CacheServer{}
+	err := r.Get(ctx, req.NamespacedName, cacheServer)
+	if err != nil {
+		if errors.IsNotFound(err) {
+			// Request object not found, could have been deleted after reconcile request.
+			// Return and don't requeue
+			log.Info("CacheServer resource not found. Ignoring since object must be deleted")
+			return ctrl.Result{}, nil
+		}
+		// Error reading the object - requeue the request.
+		log.Error(err, "Failed to get CacheServer")
+		return ctrl.Result{}, err
+	}
+
+	// Check if the service already exists, if not create a new one
+	foundService := &corev1.Service{}
+	err = r.Get(ctx, types.NamespacedName{Name: cacheServer.Name, Namespace: cacheServer.Namespace}, foundService)
+	if err != nil && errors.IsNotFound(err) {
+		// Define a new service
+		svc := r.serviceForCacheServer(cacheServer)
+		log.Info("Creating a new Service", "Service.Namespace", svc.Namespace, "Service.Name", svc.Name)
+		err = r.Create(ctx, svc)
+		if err != nil {
+			log.Error(err, "Failed to create new Service", "Service.Namespace", svc.Namespace, "Service.Name", svc.Name)
+			return ctrl.Result{}, err
+		}
+		// Service created successfully - return and requeue
+		return ctrl.Result{Requeue: true}, nil
+	} else if err != nil {
+		log.Error(err, "Failed to get Service")
+		return ctrl.Result{}, err
+	}
+
+	// Check if the deployment already exists, if not create a new one
+	found := &appsv1.Deployment{}
+	err = r.Get(ctx, types.NamespacedName{Name: cacheServer.Name, Namespace: cacheServer.Namespace}, found)
+	if err != nil && errors.IsNotFound(err) {
+		// Define a new deployment
+		dep := r.deploymentForCacheServer(cacheServer)
+		log.Info("Creating a new Deployment", "Deployment.Namespace", dep.Namespace, "Deployment.Name", dep.Name)
+		err = r.Create(ctx, dep)
+		if err != nil {
+			log.Error(err, "Failed to create new Deployment", "Deployment.Namespace", dep.Namespace, "Deployment.Name", dep.Name)
+			return ctrl.Result{}, err
+		}
+		// Deployment created successfully - return and requeue
+		return ctrl.Result{Requeue: true}, nil
+	} else if err != nil {
+		log.Error(err, "Failed to get Deployment")
+		return ctrl.Result{}, err
+	}
+
+	// Update the deployment if needed
+	if r.deploymentNeedsUpdate(found, cacheServer) {
+		log.Info("Updating Deployment", "Deployment.Namespace", found.Namespace, "Deployment.Name", found.Name)
+		// Create new deployment spec
+		newDep := r.deploymentForCacheServer(cacheServer)
+
+		err = r.Update(ctx, newDep)
+		if err != nil {
+			log.Error(err, "Failed to update Deployment", "Deployment.Namespace", found.Namespace, "Deployment.Name", found.Name)
+			return ctrl.Result{}, err
+		}
+		// Deployment updated successfully - return and requeue
+		return ctrl.Result{Requeue: true}, nil
+	}
+
+	// Update the status
+	if err := r.updateStatus(ctx, cacheServer, found); err != nil {
+		log.Error(err, "Failed to update CacheServer status")
+		return ctrl.Result{}, err
+	}
+
+	return ctrl.Result{}, nil
+}
+
+// deploymentForCacheServer returns a CacheServer Deployment object
+func (r *CacheServerReconciler) deploymentForCacheServer(cacheServer *productionstackv1alpha1.CacheServer) *appsv1.Deployment {
+	labels := map[string]string{
+		"app": cacheServer.Name,
+	}
+
+	// Build resource requirements
+	resources := corev1.ResourceRequirements{
+		Requests: corev1.ResourceList{},
+		Limits:   corev1.ResourceList{},
+	}
+
+	if cacheServer.Spec.Resources.CPU != "" {
+		resources.Requests[corev1.ResourceCPU] = resource.MustParse(cacheServer.Spec.Resources.CPU)
+		resources.Limits[corev1.ResourceCPU] = resource.MustParse(cacheServer.Spec.Resources.CPU)
+	}
+
+	if cacheServer.Spec.Resources.Memory != "" {
+		resources.Requests[corev1.ResourceMemory] = resource.MustParse(cacheServer.Spec.Resources.Memory)
+		resources.Limits[corev1.ResourceMemory] = resource.MustParse(cacheServer.Spec.Resources.Memory)
+	}
+
+	// Get the image from Image spec
+	image := cacheServer.Spec.Image.Registry + "/" + cacheServer.Spec.Image.Name
+
+	// Get the image pull policy
+	imagePullPolicy := corev1.PullIfNotPresent
+	if cacheServer.Spec.Image.PullPolicy != "" {
+		imagePullPolicy = corev1.PullPolicy(cacheServer.Spec.Image.PullPolicy)
+	}
+
+	dep := &appsv1.Deployment{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      cacheServer.Name,
+			Namespace: cacheServer.Namespace,
+		},
+		Spec: appsv1.DeploymentSpec{
+			Replicas: &cacheServer.Spec.Replicas,
+			Selector: &metav1.LabelSelector{
+				MatchLabels: labels,
+			},
+			Template: corev1.PodTemplateSpec{
+				ObjectMeta: metav1.ObjectMeta{
+					Labels: labels,
+				},
+				Spec: corev1.PodSpec{
+					Containers: []corev1.Container{
+						{
+							Name:            "cache-server",
+							Image:           image,
+							ImagePullPolicy: imagePullPolicy,
+							Command: []string{
+								"lmcache_experimental_server",
+								"0.0.0.0",
+								fmt.Sprintf("%d", cacheServer.Spec.Port)},
+							Ports: []corev1.ContainerPort{
+								{
+									Name:          "http",
+									ContainerPort: cacheServer.Spec.Port,
+								},
+							},
+							Resources: resources,
+						},
+					},
+				},
+			},
+		},
+	}
+
+	// Set the owner reference
+	ctrl.SetControllerReference(cacheServer, dep, r.Scheme)
+	return dep
+}
+
+// deploymentNeedsUpdate checks if the deployment needs to be updated
+func (r *CacheServerReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, cs *productionstackv1alpha1.CacheServer) bool {
+	// Compare replicas
+	if *dep.Spec.Replicas != cs.Spec.Replicas {
+		return true
+	}
+
+	// Compare resources
+	expectedResources := r.deploymentForCacheServer(cs).Spec.Template.Spec.Containers[0].Resources
+	actualResources := dep.Spec.Template.Spec.Containers[0].Resources
+	if !reflect.DeepEqual(expectedResources, actualResources) {
+		return true
+	}
+
+	return false
+}
+
+// updateStatus updates the status of the CacheServer
+func (r *CacheServerReconciler) updateStatus(ctx context.Context, cs *productionstackv1alpha1.CacheServer, dep *appsv1.Deployment) error {
+	return retry.OnError(retry.DefaultRetry, func(err error) bool {
+		return errors.IsConflict(err)
+	}, func() error {
+		// Get the latest version of the CacheServer
+		latestCS := &productionstackv1alpha1.CacheServer{}
+		if err := r.Get(ctx, types.NamespacedName{Name: cs.Name, Namespace: cs.Namespace}, latestCS); err != nil {
+			return err
+		}
+
+		// Update the status fields
+		latestCS.Status.LastUpdated = metav1.Now()
+
+		// Update status based on deployment status
+		if dep.Status.AvailableReplicas > 0 {
+			latestCS.Status.Status = "Ready"
+		} else if dep.Status.UpdatedReplicas > 0 {
+			latestCS.Status.Status = "Updating"
+		} else {
+			latestCS.Status.Status = "NotReady"
+		}
+
+		return r.Status().Update(ctx, latestCS)
+	})
+}
+
+// serviceForCacheServer returns a CacheServer Service object
+func (r *CacheServerReconciler) serviceForCacheServer(cacheServer *productionstackv1alpha1.CacheServer) *corev1.Service {
+	labels := map[string]string{
+		"app": cacheServer.Name,
+	}
+
+	svc := &corev1.Service{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      cacheServer.Name,
+			Namespace: cacheServer.Namespace,
+		},
+		Spec: corev1.ServiceSpec{
+			Type:     corev1.ServiceTypeClusterIP,
+			Selector: labels,
+			Ports: []corev1.ServicePort{
+				{
+					Name:       "http",
+					Port:       80,
+					TargetPort: intstr.FromInt(int(cacheServer.Spec.Port)),
+					Protocol:   corev1.ProtocolTCP,
+				},
+			},
+		},
+	}
+
+	// Set the owner reference
+	ctrl.SetControllerReference(cacheServer, svc, r.Scheme)
+	return svc
+}
+
+// SetupWithManager sets up the controller with the Manager.
+func (r *CacheServerReconciler) SetupWithManager(mgr ctrl.Manager) error {
+	return ctrl.NewControllerManagedBy(mgr).
+		For(&productionstackv1alpha1.CacheServer{}).
+		Owns(&appsv1.Deployment{}).
+		Owns(&corev1.Service{}).
+		Complete(r)
+}
diff --git a/operator/internal/controller/cacheserver_controller_test.go b/operator/internal/controller/cacheserver_controller_test.go
new file mode 100644
index 000000000..681a04d0d
--- /dev/null
+++ b/operator/internal/controller/cacheserver_controller_test.go
@@ -0,0 +1,84 @@
+/*
+Copyright 2025.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package controller
+
+import (
+	"context"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+	"k8s.io/apimachinery/pkg/api/errors"
+	"k8s.io/apimachinery/pkg/types"
+	"sigs.k8s.io/controller-runtime/pkg/reconcile"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	productionstackv1alpha1 "production-stack/api/v1alpha1"
+)
+
+var _ = Describe("CacheServer Controller", func() {
+	Context("When reconciling a resource", func() {
+		const resourceName = "test-resource"
+
+		ctx := context.Background()
+
+		typeNamespacedName := types.NamespacedName{
+			Name:      resourceName,
+			Namespace: "default", // TODO(user):Modify as needed
+		}
+		cacheserver := &productionstackv1alpha1.CacheServer{}
+
+		BeforeEach(func() {
+			By("creating the custom resource for the Kind CacheServer")
+			err := k8sClient.Get(ctx, typeNamespacedName, cacheserver)
+			if err != nil && errors.IsNotFound(err) {
+				resource := &productionstackv1alpha1.CacheServer{
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      resourceName,
+						Namespace: "default",
+					},
+					// TODO(user): Specify other spec details if needed.
+				}
+				Expect(k8sClient.Create(ctx, resource)).To(Succeed())
+			}
+		})
+
+		AfterEach(func() {
+			// TODO(user): Cleanup logic after each test, like removing the resource instance.
+			resource := &productionstackv1alpha1.CacheServer{}
+			err := k8sClient.Get(ctx, typeNamespacedName, resource)
+			Expect(err).NotTo(HaveOccurred())
+
+			By("Cleanup the specific resource instance CacheServer")
+			Expect(k8sClient.Delete(ctx, resource)).To(Succeed())
+		})
+		It("should successfully reconcile the resource", func() {
+			By("Reconciling the created resource")
+			controllerReconciler := &CacheServerReconciler{
+				Client: k8sClient,
+				Scheme: k8sClient.Scheme(),
+			}
+
+			_, err := controllerReconciler.Reconcile(ctx, reconcile.Request{
+				NamespacedName: typeNamespacedName,
+			})
+			Expect(err).NotTo(HaveOccurred())
+			// TODO(user): Add more specific assertions depending on your controller's reconciliation logic.
+			// Example: If you expect a certain status condition after reconciliation, verify it here.
+		})
+	})
+})

From 397deac7409cf1b55f8b3dfe355f54e077222dd7 Mon Sep 17 00:00:00 2001
From: royyhuang <roy.y.huang@gmail.com>
Date: Fri, 9 May 2025 07:14:45 +0000
Subject: [PATCH 06/14] fix service discorvery issue by adding readiness probe
 to vllm pod

Signed-off-by: royyhuang <roy.y.huang@gmail.com>
---
 operator/api/v1alpha1/vllmrouter_types.go     |  4 +++
 .../production-stack.vllm.ai_vllmrouters.yaml |  4 +++
 .../production-stack_v1alpha_vllmrouter.yaml  |  3 ++
 .../production-stack_v1alpha_vllmruntime.yaml |  4 +--
 .../controller/cacheserver_controller.go      |  4 +--
 .../controller/vllmrouter_controller.go       |  5 ++-
 .../controller/vllmruntime_controller.go      | 32 +++++++++++++++++--
 7 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/operator/api/v1alpha1/vllmrouter_types.go b/operator/api/v1alpha1/vllmrouter_types.go
index 446ee10b6..c0a81ff71 100644
--- a/operator/api/v1alpha1/vllmrouter_types.go
+++ b/operator/api/v1alpha1/vllmrouter_types.go
@@ -39,6 +39,10 @@ type VLLMRouterSpec struct {
 	// +kubebuilder:default=k8s
 	ServiceDiscovery string `json:"serviceDiscovery,omitempty"`
 
+	// K8sLabelSelector specifies the label selector for vLLM runtime pods when using k8s service discovery
+	// +kubebuilder:validation:RequiredWhen=ServiceDiscovery=k8s
+	K8sLabelSelector string `json:"k8sLabelSelector,omitempty"`
+
 	// StaticBackends is required when using static service discovery
 	// +kubebuilder:validation:RequiredWhen=ServiceDiscovery=static
 	StaticBackends string `json:"staticBackends,omitempty"`
diff --git a/operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml b/operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml
index 8668380c3..c6de29def 100644
--- a/operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml
+++ b/operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml
@@ -81,6 +81,10 @@ spec:
                 - name
                 - registry
                 type: object
+              k8sLabelSelector:
+                description: K8sLabelSelector specifies the label selector for vLLM
+                  runtime pods when using k8s service discovery
+                type: string
               nodeSelectorTerms:
                 description: NodeSelectorTerms for pod scheduling
                 items:
diff --git a/operator/config/samples/production-stack_v1alpha_vllmrouter.yaml b/operator/config/samples/production-stack_v1alpha_vllmrouter.yaml
index c774472dd..80995f0c9 100644
--- a/operator/config/samples/production-stack_v1alpha_vllmrouter.yaml
+++ b/operator/config/samples/production-stack_v1alpha_vllmrouter.yaml
@@ -15,6 +15,9 @@ spec:
   # Service discovery method (k8s or static)
   serviceDiscovery: k8s
 
+  # Label selector for vLLM runtime pods
+  k8sLabelSelector: "app=vllmruntime-sample"
+
   # Routing strategy (roundrobin or session)
   routingLogic: roundrobin
 
diff --git a/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml b/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml
index 7b8947a4b..a58a6c0ee 100644
--- a/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml
+++ b/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml
@@ -21,8 +21,8 @@ spec:
     enabled: true
     cpuOffloadingBufferSize: "15"
     diskOffloadingBufferSize: "8"
-    # remoteUrl: "lm://cacheserver-sample.default.svc.cluster.local:80"
-    # remoteSerde: "naive"
+    remoteUrl: "lm://cacheserver-sample.default.svc.cluster.local:80"
+    remoteSerde: "naive"
 
   # Model configuration
   model:
diff --git a/operator/internal/controller/cacheserver_controller.go b/operator/internal/controller/cacheserver_controller.go
index 1a534dd27..2f56ddf8f 100644
--- a/operator/internal/controller/cacheserver_controller.go
+++ b/operator/internal/controller/cacheserver_controller.go
@@ -224,9 +224,7 @@ func (r *CacheServerReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, cs
 
 // updateStatus updates the status of the CacheServer
 func (r *CacheServerReconciler) updateStatus(ctx context.Context, cs *productionstackv1alpha1.CacheServer, dep *appsv1.Deployment) error {
-	return retry.OnError(retry.DefaultRetry, func(err error) bool {
-		return errors.IsConflict(err)
-	}, func() error {
+	return retry.RetryOnConflict(retry.DefaultRetry, func() error {
 		// Get the latest version of the CacheServer
 		latestCS := &productionstackv1alpha1.CacheServer{}
 		if err := r.Get(ctx, types.NamespacedName{Name: cs.Name, Namespace: cs.Namespace}, latestCS); err != nil {
diff --git a/operator/internal/controller/vllmrouter_controller.go b/operator/internal/controller/vllmrouter_controller.go
index dc99ee433..00f90e1b9 100644
--- a/operator/internal/controller/vllmrouter_controller.go
+++ b/operator/internal/controller/vllmrouter_controller.go
@@ -208,6 +208,7 @@ func (r *VLLMRouterReconciler) deploymentForVLLMRouter(router *servingv1alpha1.V
 	if router.Spec.ServiceDiscovery == "k8s" {
 		args = append(args,
 			"--k8s-namespace", router.Namespace,
+			"--k8s-label-selector", router.Spec.K8sLabelSelector,
 		)
 	} else if router.Spec.ServiceDiscovery == "static" {
 		if router.Spec.StaticBackends == "" || router.Spec.StaticModels == "" {
@@ -324,9 +325,7 @@ func (r *VLLMRouterReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, rou
 
 // updateStatus updates the status of the VLLMRouter
 func (r *VLLMRouterReconciler) updateStatus(ctx context.Context, router *servingv1alpha1.VLLMRouter, dep *appsv1.Deployment) error {
-	return retry.OnError(retry.DefaultRetry, func(err error) bool {
-		return errors.IsConflict(err)
-	}, func() error {
+	return retry.RetryOnConflict(retry.DefaultRetry, func() error {
 		// Get the latest version of the VLLMRouter
 		latestRouter := &servingv1alpha1.VLLMRouter{}
 		if err := r.Get(ctx, types.NamespacedName{Name: router.Name, Namespace: router.Namespace}, latestRouter); err != nil {
diff --git a/operator/internal/controller/vllmruntime_controller.go b/operator/internal/controller/vllmruntime_controller.go
index a4937fc59..8725b9c15 100644
--- a/operator/internal/controller/vllmruntime_controller.go
+++ b/operator/internal/controller/vllmruntime_controller.go
@@ -392,6 +392,34 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production
 								},
 							},
 							Resources: resources,
+							ReadinessProbe: &corev1.Probe{
+								ProbeHandler: corev1.ProbeHandler{
+									HTTPGet: &corev1.HTTPGetAction{
+										Path:   "/health",
+										Port:   intstr.FromInt(int(vllmRuntime.Spec.Port)),
+										Scheme: corev1.URISchemeHTTP,
+									},
+								},
+								InitialDelaySeconds: 10,
+								PeriodSeconds:       5,
+								TimeoutSeconds:      3,
+								SuccessThreshold:    1,
+								FailureThreshold:    3,
+							},
+							LivenessProbe: &corev1.Probe{
+								ProbeHandler: corev1.ProbeHandler{
+									HTTPGet: &corev1.HTTPGetAction{
+										Path:   "/health",
+										Port:   intstr.FromInt(int(vllmRuntime.Spec.Port)),
+										Scheme: corev1.URISchemeHTTP,
+									},
+								},
+								InitialDelaySeconds: 30,
+								PeriodSeconds:       10,
+								TimeoutSeconds:      3,
+								SuccessThreshold:    1,
+								FailureThreshold:    3,
+							},
 						},
 					},
 				},
@@ -479,9 +507,7 @@ func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr
 
 // updateStatus updates the status of the VLLMRuntime
 func (r *VLLMRuntimeReconciler) updateStatus(ctx context.Context, vr *productionstackv1alpha1.VLLMRuntime, dep *appsv1.Deployment) error {
-	return retry.OnError(retry.DefaultRetry, func(err error) bool {
-		return errors.IsConflict(err)
-	}, func() error {
+	return retry.RetryOnConflict(retry.DefaultRetry, func() error {
 		// Get the latest version of the VLLMRuntime
 		latestVR := &productionstackv1alpha1.VLLMRuntime{}
 		if err := r.Get(ctx, types.NamespacedName{Name: vr.Name, Namespace: vr.Namespace}, latestVR); err != nil {

From 56ce7f9a35c8b988e9dc8bf4e674511033b8f385 Mon Sep 17 00:00:00 2001
From: royyhuang <roy.y.huang@gmail.com>
Date: Tue, 13 May 2025 21:08:45 +0000
Subject: [PATCH 07/14] fix readiness probe

Signed-off-by: royyhuang <roy.y.huang@gmail.com>
---
 operator/config/manager/kustomization.yaml             |  2 +-
 operator/config/rbac/kustomization.yaml                | 10 +++++++---
 ....yaml => production-stack_v1alpha1_vllmrouter.yaml} |  0
 ...yaml => production-stack_v1alpha1_vllmruntime.yaml} |  2 +-
 operator/internal/controller/vllmruntime_controller.go | 10 +++++-----
 5 files changed, 14 insertions(+), 10 deletions(-)
 rename operator/config/samples/{production-stack_v1alpha_vllmrouter.yaml => production-stack_v1alpha1_vllmrouter.yaml} (100%)
 rename operator/config/samples/{production-stack_v1alpha_vllmruntime.yaml => production-stack_v1alpha1_vllmruntime.yaml} (97%)

diff --git a/operator/config/manager/kustomization.yaml b/operator/config/manager/kustomization.yaml
index f3af0a933..cc9f03e24 100644
--- a/operator/config/manager/kustomization.yaml
+++ b/operator/config/manager/kustomization.yaml
@@ -5,5 +5,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 images:
 - name: controller
-  newName: controller
+  newName: 1nfinity/production-stack-controller
   newTag: latest
diff --git a/operator/config/rbac/kustomization.yaml b/operator/config/rbac/kustomization.yaml
index e47dad1b5..57100cf7c 100644
--- a/operator/config/rbac/kustomization.yaml
+++ b/operator/config/rbac/kustomization.yaml
@@ -25,9 +25,13 @@ resources:
 - cacheserver_admin_role.yaml
 - cacheserver_editor_role.yaml
 - cacheserver_viewer_role.yaml
-- router_admin_role.yaml
-- router_editor_role.yaml
-- router_viewer_role.yaml
+- vllmrouter_admin_role.yaml
+- vllmrouter_editor_role.yaml
+- vllmrouter_viewer_role.yaml
 - vllmruntime_admin_role.yaml
 - vllmruntime_editor_role.yaml
 - vllmruntime_viewer_role.yaml
+# Pod viewer role is used to view pods for vllmrouter's service discovery
+- pod_viewer_role.yaml
+- vllmrouter_service_account.yaml
+- vllmrouter_role_binding.yaml
diff --git a/operator/config/samples/production-stack_v1alpha_vllmrouter.yaml b/operator/config/samples/production-stack_v1alpha1_vllmrouter.yaml
similarity index 100%
rename from operator/config/samples/production-stack_v1alpha_vllmrouter.yaml
rename to operator/config/samples/production-stack_v1alpha1_vllmrouter.yaml
diff --git a/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml b/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml
similarity index 97%
rename from operator/config/samples/production-stack_v1alpha_vllmruntime.yaml
rename to operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml
index a58a6c0ee..051f7b883 100644
--- a/operator/config/samples/production-stack_v1alpha_vllmruntime.yaml
+++ b/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml
@@ -20,7 +20,7 @@ spec:
   lmCacheConfig:
     enabled: true
     cpuOffloadingBufferSize: "15"
-    diskOffloadingBufferSize: "8"
+    diskOffloadingBufferSize: "0"
     remoteUrl: "lm://cacheserver-sample.default.svc.cluster.local:80"
     remoteSerde: "naive"
 
diff --git a/operator/internal/controller/vllmruntime_controller.go b/operator/internal/controller/vllmruntime_controller.go
index 8725b9c15..394d38bb1 100644
--- a/operator/internal/controller/vllmruntime_controller.go
+++ b/operator/internal/controller/vllmruntime_controller.go
@@ -400,11 +400,11 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production
 										Scheme: corev1.URISchemeHTTP,
 									},
 								},
-								InitialDelaySeconds: 10,
-								PeriodSeconds:       5,
-								TimeoutSeconds:      3,
+								InitialDelaySeconds: 30,
+								PeriodSeconds:       20,
+								TimeoutSeconds:      5,
 								SuccessThreshold:    1,
-								FailureThreshold:    3,
+								FailureThreshold:    10,
 							},
 							LivenessProbe: &corev1.Probe{
 								ProbeHandler: corev1.ProbeHandler{
@@ -414,7 +414,7 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production
 										Scheme: corev1.URISchemeHTTP,
 									},
 								},
-								InitialDelaySeconds: 30,
+								InitialDelaySeconds: 240,
 								PeriodSeconds:       10,
 								TimeoutSeconds:      3,
 								SuccessThreshold:    1,

From a28c5f1d17bee3b34ee148623372f5e3a2cf0d72 Mon Sep 17 00:00:00 2001
From: royyhuang <roy.y.huang@gmail.com>
Date: Mon, 2 Jun 2025 23:48:15 +0000
Subject: [PATCH 08/14] restructure runtime crd spec

Signed-off-by: royyhuang <roy.y.huang@gmail.com>
---
 .pre-commit-config.yaml                       |   1 +
 operator/api/v1alpha1/vllmruntime_types.go    |  58 ++++--
 .../api/v1alpha1/zz_generated.deepcopy.go     |  58 ++++--
 ...production-stack.vllm.ai_vllmruntimes.yaml | 197 +++++++++---------
 operator/config/rbac/pod_viewer_role.yaml     |  17 --
 .../config/rbac/vllmrouter_role_binding.yaml  |  33 ++-
 .../rbac/vllmrouter_service_account.yaml      |   8 -
 ...production-stack_v1alpha1_vllmruntime.yaml |  82 ++++----
 .../controller/vllmruntime_controller.go      | 153 +++++++-------
 9 files changed, 340 insertions(+), 267 deletions(-)
 delete mode 100644 operator/config/rbac/pod_viewer_role.yaml
 delete mode 100644 operator/config/rbac/vllmrouter_service_account.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index dd8a473ca..207012987 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,6 +9,7 @@ repos:
   - id: check-json
   - id: check-toml
   - id: check-yaml
+    args: ["--allow-multiple-documents"]
     exclude: |
       (?x)(
           ^helm/templates/|
diff --git a/operator/api/v1alpha1/vllmruntime_types.go b/operator/api/v1alpha1/vllmruntime_types.go
index 1ce19c2b7..6ac49e1fe 100644
--- a/operator/api/v1alpha1/vllmruntime_types.go
+++ b/operator/api/v1alpha1/vllmruntime_types.go
@@ -24,11 +24,41 @@ import (
 // EDIT THIS FILE!  THIS IS SCAFFOLDING FOR YOU TO OWN!
 // NOTE: json tags are required.  Any new fields you add must have json tags for the fields to be serialized.
 
+// DeploymentConfig defines the deployment configuration
+type DeploymentConfig struct {
+	// Replicas
+	// +kubebuilder:default=1
+	Replicas int32 `json:"replicas,omitempty"`
+
+	// Deploy strategy
+	// +kubebuilder:validation:Enum=RollingUpdate;Recreate
+	// +kubebuilder:default=RollingUpdate
+	DeployStrategy string `json:"deploymentStrategy,omitempty"`
+
+	// Resource requirements
+	Resources ResourceRequirements `json:"resources"`
+
+	// Image configuration
+	Image ImageSpec `json:"image"`
+}
+
 // VLLMRuntimeSpec defines the desired state of VLLMRuntime
 type VLLMRuntimeSpec struct {
 	// Model configuration
 	Model ModelSpec `json:"model"`
 
+	// vLLM server configuration
+	VLLMConfig VLLMConfig `json:"vllmConfig"`
+
+	// LM Cache configuration
+	LMCacheConfig LMCacheConfig `json:"lmCacheConfig,omitempty"`
+
+	// Deployment configuration
+	DeploymentConfig DeploymentConfig `json:"deploymentConfig"`
+}
+
+// VLLMConfig defines the vLLM server configuration
+type VLLMConfig struct {
 	// Enable chunked prefill
 	EnableChunkedPrefill bool `json:"enableChunkedPrefill,omitempty"`
 
@@ -44,9 +74,6 @@ type VLLMRuntimeSpec struct {
 	// Maximum number of LoRAs
 	MaxLoras int32 `json:"maxLoras,omitempty"`
 
-	// LM Cache configuration
-	LMCacheConfig LMCacheConfig `json:"lmCacheConfig,omitempty"`
-
 	// Extra arguments for vllm serve
 	ExtraArgs []string `json:"extraArgs,omitempty"`
 
@@ -59,12 +86,12 @@ type VLLMRuntimeSpec struct {
 
 	// Environment variables
 	Env []EnvVar `json:"env,omitempty"`
+}
 
-	// Resource requirements
-	Resources ResourceRequirements `json:"resources"`
-
-	// Image configuration
-	Image ImageSpec `json:"image"`
+// ModelSpec defines the model configuration
+type ModelSpec struct {
+	// Model URL
+	ModelURL string `json:"modelURL"`
 
 	// HuggingFace token secret
 	HFTokenSecret corev1.LocalObjectReference `json:"hfTokenSecret,omitempty"`
@@ -72,21 +99,6 @@ type VLLMRuntimeSpec struct {
 	// +kubebuilder:validation:RequiredWhen=HFTokenSecret.Name!=""
 	HFTokenName string `json:"hfTokenName,omitempty"`
 
-	// Replicas
-	// +kubebuilder:default=1
-	Replicas int32 `json:"replicas,omitempty"`
-
-	// Deploy strategy
-	// +kubebuilder:validation:Enum=RollingUpdate;Recreate
-	// +kubebuilder:default=RollingUpdate
-	DeployStrategy string `json:"deploymentStrategy,omitempty"`
-}
-
-// ModelSpec defines the model configuration
-type ModelSpec struct {
-	// Model URL
-	ModelURL string `json:"modelURL"`
-
 	// Enable LoRA
 	EnableLoRA bool `json:"enableLoRA,omitempty"`
 
diff --git a/operator/api/v1alpha1/zz_generated.deepcopy.go b/operator/api/v1alpha1/zz_generated.deepcopy.go
index 21bbc16e8..cfacfb173 100644
--- a/operator/api/v1alpha1/zz_generated.deepcopy.go
+++ b/operator/api/v1alpha1/zz_generated.deepcopy.go
@@ -117,6 +117,23 @@ func (in *CacheServerStatus) DeepCopy() *CacheServerStatus {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DeploymentConfig) DeepCopyInto(out *DeploymentConfig) {
+	*out = *in
+	out.Resources = in.Resources
+	out.Image = in.Image
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeploymentConfig.
+func (in *DeploymentConfig) DeepCopy() *DeploymentConfig {
+	if in == nil {
+		return nil
+	}
+	out := new(DeploymentConfig)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *EnvVar) DeepCopyInto(out *EnvVar) {
 	*out = *in
@@ -165,6 +182,7 @@ func (in *LMCacheConfig) DeepCopy() *LMCacheConfig {
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *ModelSpec) DeepCopyInto(out *ModelSpec) {
 	*out = *in
+	out.HFTokenSecret = in.HFTokenSecret
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelSpec.
@@ -192,6 +210,31 @@ func (in *ResourceRequirements) DeepCopy() *ResourceRequirements {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *VLLMConfig) DeepCopyInto(out *VLLMConfig) {
+	*out = *in
+	if in.ExtraArgs != nil {
+		in, out := &in.ExtraArgs, &out.ExtraArgs
+		*out = make([]string, len(*in))
+		copy(*out, *in)
+	}
+	if in.Env != nil {
+		in, out := &in.Env, &out.Env
+		*out = make([]EnvVar, len(*in))
+		copy(*out, *in)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VLLMConfig.
+func (in *VLLMConfig) DeepCopy() *VLLMConfig {
+	if in == nil {
+		return nil
+	}
+	out := new(VLLMConfig)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *VLLMRouter) DeepCopyInto(out *VLLMRouter) {
 	*out = *in
@@ -365,20 +408,9 @@ func (in *VLLMRuntimeList) DeepCopyObject() runtime.Object {
 func (in *VLLMRuntimeSpec) DeepCopyInto(out *VLLMRuntimeSpec) {
 	*out = *in
 	out.Model = in.Model
+	in.VLLMConfig.DeepCopyInto(&out.VLLMConfig)
 	out.LMCacheConfig = in.LMCacheConfig
-	if in.ExtraArgs != nil {
-		in, out := &in.ExtraArgs, &out.ExtraArgs
-		*out = make([]string, len(*in))
-		copy(*out, *in)
-	}
-	if in.Env != nil {
-		in, out := &in.Env, &out.Env
-		*out = make([]EnvVar, len(*in))
-		copy(*out, *in)
-	}
-	out.Resources = in.Resources
-	out.Image = in.Image
-	out.HFTokenSecret = in.HFTokenSecret
+	out.DeploymentConfig = in.DeploymentConfig
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VLLMRuntimeSpec.
diff --git a/operator/config/crd/bases/production-stack.vllm.ai_vllmruntimes.yaml b/operator/config/crd/bases/production-stack.vllm.ai_vllmruntimes.yaml
index c3f21d2ac..44cc896cd 100644
--- a/operator/config/crd/bases/production-stack.vllm.ai_vllmruntimes.yaml
+++ b/operator/config/crd/bases/production-stack.vllm.ai_vllmruntimes.yaml
@@ -41,72 +41,49 @@ spec:
           spec:
             description: VLLMRuntimeSpec defines the desired state of VLLMRuntime
             properties:
-              deploymentStrategy:
-                default: RollingUpdate
-                description: Deploy strategy
-                enum:
-                - RollingUpdate
-                - Recreate
-                type: string
-              enableChunkedPrefill:
-                description: Enable chunked prefill
-                type: boolean
-              enablePrefixCaching:
-                description: Enable prefix caching
-                type: boolean
-              env:
-                description: Environment variables
-                items:
-                  description: EnvVar represents an environment variable
-                  properties:
-                    name:
-                      type: string
-                    value:
-                      type: string
-                  required:
-                  - name
-                  - value
-                  type: object
-                type: array
-              extraArgs:
-                description: Extra arguments for vllm serve
-                items:
-                  type: string
-                type: array
-              gpuMemoryUtilization:
-                description: GPU memory utilization
-                type: string
-              hfTokenName:
-                default: token
-                type: string
-              hfTokenSecret:
-                description: HuggingFace token secret
+              deploymentConfig:
+                description: Deployment configuration
                 properties:
-                  name:
-                    default: ""
-                    description: |-
-                      Name of the referent.
-                      This field is effectively required, but due to backwards compatibility is
-                      allowed to be empty. Instances of this type with an empty value here are
-                      almost certainly wrong.
-                      More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
-                    type: string
-                type: object
-                x-kubernetes-map-type: atomic
-              image:
-                description: Image configuration
-                properties:
-                  name:
-                    type: string
-                  pullPolicy:
-                    type: string
-                  pullSecretName:
-                    type: string
-                  registry:
+                  deploymentStrategy:
+                    default: RollingUpdate
+                    description: Deploy strategy
+                    enum:
+                    - RollingUpdate
+                    - Recreate
                     type: string
+                  image:
+                    description: Image configuration
+                    properties:
+                      name:
+                        type: string
+                      pullPolicy:
+                        type: string
+                      pullSecretName:
+                        type: string
+                      registry:
+                        type: string
+                    required:
+                    - name
+                    - registry
+                    type: object
+                  replicas:
+                    default: 1
+                    description: Replicas
+                    format: int32
+                    type: integer
+                  resources:
+                    description: Resource requirements
+                    properties:
+                      cpu:
+                        type: string
+                      gpu:
+                        type: string
+                      memory:
+                        type: string
+                    type: object
                 required:
-                - name
-                - registry
+                - image
+                - resources
                 type: object
               lmCacheConfig:
                 description: LM Cache configuration
@@ -133,10 +110,6 @@ spec:
                     description: RemoteURL is the URL of the remote cache server
                     type: string
                 type: object
-              maxLoras:
-                description: Maximum number of LoRAs
-                format: int32
-                type: integer
               model:
                 description: Model configuration
                 properties:
@@ -149,6 +122,23 @@ spec:
                   enableTool:
                     description: Enable tool
                     type: boolean
+                  hfTokenName:
+                    default: token
+                    type: string
+                  hfTokenSecret:
+                    description: HuggingFace token secret
+                    properties:
+                      name:
+                        default: ""
+                        description: |-
+                          Name of the referent.
+                          This field is effectively required, but due to backwards compatibility is
+                          allowed to be empty. Instances of this type with an empty value here are
+                          almost certainly wrong.
+                          More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                        type: string
+                    type: object
+                    x-kubernetes-map-type: atomic
                   maxModelLen:
                     description: Maximum model length
                     format: int32
@@ -166,37 +156,58 @@ spec:
                 required:
                 - modelURL
                 type: object
-              port:
-                default: 8000
-                description: Port for vLLM server
-                format: int32
-                type: integer
-              replicas:
-                default: 1
-                description: Replicas
-                format: int32
-                type: integer
-              resources:
-                description: Resource requirements
+              vllmConfig:
+                description: vLLM server configuration
                 properties:
-                  cpu:
-                    type: string
-                  gpu:
-                    type: string
-                  memory:
+                  enableChunkedPrefill:
+                    description: Enable chunked prefill
+                    type: boolean
+                  enablePrefixCaching:
+                    description: Enable prefix caching
+                    type: boolean
+                  env:
+                    description: Environment variables
+                    items:
+                      description: EnvVar represents an environment variable
+                      properties:
+                        name:
+                          type: string
+                        value:
+                          type: string
+                      required:
+                      - name
+                      - value
+                      type: object
+                    type: array
+                  extraArgs:
+                    description: Extra arguments for vllm serve
+                    items:
+                      type: string
+                    type: array
+                  gpuMemoryUtilization:
+                    description: GPU memory utilization
                     type: string
+                  maxLoras:
+                    description: Maximum number of LoRAs
+                    format: int32
+                    type: integer
+                  port:
+                    default: 8000
+                    description: Port for vLLM server
+                    format: int32
+                    type: integer
+                  tensorParallelSize:
+                    description: Tensor parallel size
+                    format: int32
+                    type: integer
+                  v1:
+                    description: Use V1 API
+                    type: boolean
                 type: object
-              tensorParallelSize:
-                description: Tensor parallel size
-                format: int32
-                type: integer
-              v1:
-                description: Use V1 API
-                type: boolean
             required:
-            - image
+            - deploymentConfig
             - model
-            - resources
+            - vllmConfig
             type: object
           status:
             description: VLLMRuntimeStatus defines the observed state of VLLMRuntime
diff --git a/operator/config/rbac/pod_viewer_role.yaml b/operator/config/rbac/pod_viewer_role.yaml
deleted file mode 100644
index b94a22369..000000000
--- a/operator/config/rbac/pod_viewer_role.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-apiVersion: rbac.authorization.k8s.io/v1
-kind: Role
-metadata:
-  name: pod-viewer-role
-  namespace: default
-  labels:
-    app.kubernetes.io/name: production-stack
-    app.kubernetes.io/managed-by: kustomize
-rules:
-- apiGroups:
-  - ""
-  resources:
-  - pods
-  verbs:
-  - get
-  - list
-  - watch
diff --git a/operator/config/rbac/vllmrouter_role_binding.yaml b/operator/config/rbac/vllmrouter_role_binding.yaml
index a29c577c8..2807c765e 100644
--- a/operator/config/rbac/vllmrouter_role_binding.yaml
+++ b/operator/config/rbac/vllmrouter_role_binding.yaml
@@ -1,4 +1,31 @@
 apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: pod-viewer-role
+  namespace: default
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+rules:
+  - apiGroups:
+      - ""
+    resources:
+      - pods
+    verbs:
+      - get
+      - list
+      - watch
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: vllmrouter-sa
+  namespace: default
+  labels:
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/managed-by: kustomize
+---
+apiVersion: rbac.authorization.k8s.io/v1
 kind: RoleBinding
 metadata:
   name: pod-viewer-binding
@@ -7,9 +34,9 @@ metadata:
     app.kubernetes.io/name: production-stack
     app.kubernetes.io/managed-by: kustomize
 subjects:
-- kind: ServiceAccount
-  name: vllmrouter-sa
-  namespace: default
+  - kind: ServiceAccount
+    name: vllmrouter-sa
+    namespace: default
 roleRef:
   kind: Role
   name: pod-viewer-role
diff --git a/operator/config/rbac/vllmrouter_service_account.yaml b/operator/config/rbac/vllmrouter_service_account.yaml
deleted file mode 100644
index 4bb14d72b..000000000
--- a/operator/config/rbac/vllmrouter_service_account.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: vllmrouter-sa
-  namespace: default
-  labels:
-    app.kubernetes.io/name: production-stack
-    app.kubernetes.io/managed-by: kustomize
diff --git a/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml b/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml
index 051f7b883..2d2ae3c2a 100644
--- a/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml
+++ b/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml
@@ -7,23 +7,6 @@ metadata:
   name: vllmruntime-sample
 spec:
 
-  # vLLM specific configurations
-  enableChunkedPrefill: false
-  enablePrefixCaching: false
-  tensorParallelSize: 1
-  gpuMemoryUtilization: "0.8"
-  maxLoras: 4
-  extraArgs: ["--disable-log-requests"]
-  v1: true
-
-  # LM Cache configuration
-  lmCacheConfig:
-    enabled: true
-    cpuOffloadingBufferSize: "15"
-    diskOffloadingBufferSize: "0"
-    remoteUrl: "lm://cacheserver-sample.default.svc.cluster.local:80"
-    remoteSerde: "naive"
-
   # Model configuration
   model:
     modelURL: "meta-llama/Llama-3.1-8B"
@@ -33,31 +16,52 @@ spec:
     maxModelLen: 4096
     dtype: "bfloat16"
     maxNumSeqs: 32
+    # HuggingFace token secret (optional)
+    hfTokenSecret:
+      name: "huggingface-token"
+    hfTokenName: "token"
 
-  # Environment variables
-  env:
-    - name: HF_HOME
-      value: "/data"
+  # vLLM server configuration
+  vllmConfig:
+    # vLLM specific configurations
+    enableChunkedPrefill: false
+    enablePrefixCaching: false
+    tensorParallelSize: 1
+    gpuMemoryUtilization: "0.8"
+    maxLoras: 4
+    extraArgs: ["--disable-log-requests"]
+    v1: true
+    port: 8000
+    # Environment variables
+    env:
+      - name: HF_HOME
+        value: "/data"
 
-  # Resource requirements
-  resources:
-    cpu: "10"
-    memory: "32Gi"
-    gpu: "1"
+  # LM Cache configuration
+  lmCacheConfig:
+    enabled: true
+    cpuOffloadingBufferSize: "15"
+    diskOffloadingBufferSize: "0"
+    remoteUrl: "lm://cacheserver-sample.default.svc.cluster.local:80"
+    remoteSerde: "naive"
 
-  # Image configuration
-  image:
-    registry: "docker.io"
-    name: "lmcache/vllm-openai:2025-04-18"
-    pullPolicy: "IfNotPresent"
-    pullSecretName: ""
+  # Deployment configuration
+  deploymentConfig:
+    # Resource requirements
+    resources:
+      cpu: "10"
+      memory: "32Gi"
+      gpu: "1"
 
-  # HuggingFace token secret (optional)
-  hfTokenSecret:
-    name: "huggingface-token"
+    # Image configuration
+    image:
+      registry: "docker.io"
+      name: "lmcache/vllm-openai:2025-04-18"
+      pullPolicy: "IfNotPresent"
+      pullSecretName: ""
 
-  # Number of replicas
-  replicas: 1
+    # Number of replicas
+    replicas: 1
 
-  # Deployment strategy
-  deploymentStrategy: "Recreate"
+    # Deployment strategy
+    deploymentStrategy: "Recreate"
diff --git a/operator/internal/controller/vllmruntime_controller.go b/operator/internal/controller/vllmruntime_controller.go
index 394d38bb1..41d46ec38 100644
--- a/operator/internal/controller/vllmruntime_controller.go
+++ b/operator/internal/controller/vllmruntime_controller.go
@@ -125,7 +125,7 @@ func (r *VLLMRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 	}
 
 	// Update the deployment if needed
-	if r.deploymentNeedsUpdate(found, vllmRuntime) {
+	if r.deploymentNeedsUpdate(ctx, found, vllmRuntime) {
 		log.Info("Updating Deployment", "Deployment.Namespace", found.Namespace, "Deployment.Name", found.Name)
 		// Create new deployment spec
 		newDep := r.deploymentForVLLMRuntime(vllmRuntime)
@@ -154,14 +154,44 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production
 		"app": vllmRuntime.Name,
 	}
 
+	// Define probes
+	readinessProbe := &corev1.Probe{
+		ProbeHandler: corev1.ProbeHandler{
+			HTTPGet: &corev1.HTTPGetAction{
+				Path:   "/health",
+				Port:   intstr.FromInt(int(vllmRuntime.Spec.VLLMConfig.Port)),
+				Scheme: corev1.URISchemeHTTP,
+			},
+		},
+		InitialDelaySeconds: 30,
+		PeriodSeconds:       20,
+		TimeoutSeconds:      5,
+		SuccessThreshold:    1,
+		FailureThreshold:    10,
+	}
+
+	livenessProbe := &corev1.Probe{
+		ProbeHandler: corev1.ProbeHandler{
+			HTTPGet: &corev1.HTTPGetAction{
+				Path:   "/health",
+				Port:   intstr.FromInt(int(vllmRuntime.Spec.VLLMConfig.Port)),
+				Scheme: corev1.URISchemeHTTP,
+			},
+		},
+		InitialDelaySeconds: 240,
+		PeriodSeconds:       10,
+		TimeoutSeconds:      3,
+		SuccessThreshold:    1,
+		FailureThreshold:    3,
+	}
+
 	// Build command line arguments
 	args := []string{
-		"--model",
 		vllmRuntime.Spec.Model.ModelURL,
 		"--host",
 		"0.0.0.0",
 		"--port",
-		fmt.Sprintf("%d", vllmRuntime.Spec.Port),
+		fmt.Sprintf("%d", vllmRuntime.Spec.VLLMConfig.Port),
 	}
 
 	if vllmRuntime.Spec.Model.EnableLoRA {
@@ -176,13 +206,13 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production
 		args = append(args, "--tool-call-parser", vllmRuntime.Spec.Model.ToolCallParser)
 	}
 
-	if vllmRuntime.Spec.EnableChunkedPrefill {
+	if vllmRuntime.Spec.VLLMConfig.EnableChunkedPrefill {
 		args = append(args, "--enable-chunked-prefill")
 	} else {
 		args = append(args, "--no-enable-chunked-prefill")
 	}
 
-	if vllmRuntime.Spec.EnablePrefixCaching {
+	if vllmRuntime.Spec.VLLMConfig.EnablePrefixCaching {
 		args = append(args, "--enable-prefix-caching")
 	} else {
 		args = append(args, "--no-enable-prefix-caching")
@@ -196,29 +226,29 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production
 		args = append(args, "--dtype", vllmRuntime.Spec.Model.DType)
 	}
 
-	if vllmRuntime.Spec.TensorParallelSize > 0 {
-		args = append(args, "--tensor-parallel-size", fmt.Sprintf("%d", vllmRuntime.Spec.TensorParallelSize))
+	if vllmRuntime.Spec.VLLMConfig.TensorParallelSize > 0 {
+		args = append(args, "--tensor-parallel-size", fmt.Sprintf("%d", vllmRuntime.Spec.VLLMConfig.TensorParallelSize))
 	}
 
 	if vllmRuntime.Spec.Model.MaxNumSeqs > 0 {
 		args = append(args, "--max-num-seqs", fmt.Sprintf("%d", vllmRuntime.Spec.Model.MaxNumSeqs))
 	}
 
-	if vllmRuntime.Spec.GpuMemoryUtilization != "" {
-		args = append(args, "--gpu_memory_utilization", vllmRuntime.Spec.GpuMemoryUtilization)
+	if vllmRuntime.Spec.VLLMConfig.GpuMemoryUtilization != "" {
+		args = append(args, "--gpu_memory_utilization", vllmRuntime.Spec.VLLMConfig.GpuMemoryUtilization)
 	}
 
-	if vllmRuntime.Spec.MaxLoras > 0 {
-		args = append(args, "--max_loras", fmt.Sprintf("%d", vllmRuntime.Spec.MaxLoras))
+	if vllmRuntime.Spec.VLLMConfig.MaxLoras > 0 {
+		args = append(args, "--max_loras", fmt.Sprintf("%d", vllmRuntime.Spec.VLLMConfig.MaxLoras))
 	}
 
-	if vllmRuntime.Spec.ExtraArgs != nil {
-		args = append(args, vllmRuntime.Spec.ExtraArgs...)
+	if vllmRuntime.Spec.VLLMConfig.ExtraArgs != nil {
+		args = append(args, vllmRuntime.Spec.VLLMConfig.ExtraArgs...)
 	}
 
 	// Build environment variables
 	env := []corev1.EnvVar{}
-	if vllmRuntime.Spec.V1 {
+	if vllmRuntime.Spec.VLLMConfig.V1 {
 		env = append(env, corev1.EnvVar{
 			Name:  "VLLM_USE_V1",
 			Value: "1",
@@ -249,7 +279,7 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production
 
 		// Add KV transfer config based on V1 flag
 		var lmcache_config string
-		if vllmRuntime.Spec.V1 {
+		if vllmRuntime.Spec.VLLMConfig.V1 {
 			lmcache_config = `{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}`
 		} else {
 			lmcache_config = `{"kv_connector":"LMCacheConnector","kv_role":"kv_both"}`
@@ -297,8 +327,8 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production
 	}
 
 	// Add user-defined environment variables
-	if vllmRuntime.Spec.Env != nil {
-		for _, e := range vllmRuntime.Spec.Env {
+	if vllmRuntime.Spec.VLLMConfig.Env != nil {
+		for _, e := range vllmRuntime.Spec.VLLMConfig.Env {
 			env = append(env, corev1.EnvVar{
 				Name:  e.Name,
 				Value: e.Value,
@@ -312,47 +342,47 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production
 		Limits:   corev1.ResourceList{},
 	}
 
-	if vllmRuntime.Spec.Resources.CPU != "" {
-		resources.Requests[corev1.ResourceCPU] = resource.MustParse(vllmRuntime.Spec.Resources.CPU)
-		resources.Limits[corev1.ResourceCPU] = resource.MustParse(vllmRuntime.Spec.Resources.CPU)
+	if vllmRuntime.Spec.DeploymentConfig.Resources.CPU != "" {
+		resources.Requests[corev1.ResourceCPU] = resource.MustParse(vllmRuntime.Spec.DeploymentConfig.Resources.CPU)
+		resources.Limits[corev1.ResourceCPU] = resource.MustParse(vllmRuntime.Spec.DeploymentConfig.Resources.CPU)
 	}
 
-	if vllmRuntime.Spec.Resources.Memory != "" {
-		resources.Requests[corev1.ResourceMemory] = resource.MustParse(vllmRuntime.Spec.Resources.Memory)
-		resources.Limits[corev1.ResourceMemory] = resource.MustParse(vllmRuntime.Spec.Resources.Memory)
+	if vllmRuntime.Spec.DeploymentConfig.Resources.Memory != "" {
+		resources.Requests[corev1.ResourceMemory] = resource.MustParse(vllmRuntime.Spec.DeploymentConfig.Resources.Memory)
+		resources.Limits[corev1.ResourceMemory] = resource.MustParse(vllmRuntime.Spec.DeploymentConfig.Resources.Memory)
 	}
 
-	if vllmRuntime.Spec.Resources.GPU != "" {
+	if vllmRuntime.Spec.DeploymentConfig.Resources.GPU != "" {
 		// Parse GPU resource as a decimal value
-		gpuResource := resource.MustParse(vllmRuntime.Spec.Resources.GPU)
+		gpuResource := resource.MustParse(vllmRuntime.Spec.DeploymentConfig.Resources.GPU)
 		resources.Requests["nvidia.com/gpu"] = gpuResource
 		resources.Limits["nvidia.com/gpu"] = gpuResource
 	}
 
 	// Get the image from Image spec or use default
-	image := vllmRuntime.Spec.Image.Registry + "/" + vllmRuntime.Spec.Image.Name
+	image := vllmRuntime.Spec.DeploymentConfig.Image.Registry + "/" + vllmRuntime.Spec.DeploymentConfig.Image.Name
 
 	// Get the image pull policy
 	imagePullPolicy := corev1.PullIfNotPresent
-	if vllmRuntime.Spec.Image.PullPolicy != "" {
-		imagePullPolicy = corev1.PullPolicy(vllmRuntime.Spec.Image.PullPolicy)
+	if vllmRuntime.Spec.DeploymentConfig.Image.PullPolicy != "" {
+		imagePullPolicy = corev1.PullPolicy(vllmRuntime.Spec.DeploymentConfig.Image.PullPolicy)
 	}
 
 	// Build image pull secrets
 	var imagePullSecrets []corev1.LocalObjectReference
-	if vllmRuntime.Spec.Image.PullSecretName != "" {
+	if vllmRuntime.Spec.DeploymentConfig.Image.PullSecretName != "" {
 		imagePullSecrets = append(imagePullSecrets, corev1.LocalObjectReference{
-			Name: vllmRuntime.Spec.Image.PullSecretName,
+			Name: vllmRuntime.Spec.DeploymentConfig.Image.PullSecretName,
 		})
 	}
 
-	if vllmRuntime.Spec.HFTokenSecret.Name != "" {
+	if vllmRuntime.Spec.Model.HFTokenSecret.Name != "" {
 		env = append(env, corev1.EnvVar{
 			Name: "HF_TOKEN",
 			ValueFrom: &corev1.EnvVarSource{
 				SecretKeyRef: &corev1.SecretKeySelector{
-					LocalObjectReference: vllmRuntime.Spec.HFTokenSecret,
-					Key:                  vllmRuntime.Spec.HFTokenName,
+					LocalObjectReference: vllmRuntime.Spec.Model.HFTokenSecret,
+					Key:                  vllmRuntime.Spec.Model.HFTokenName,
 				},
 			},
 		})
@@ -364,9 +394,9 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production
 			Namespace: vllmRuntime.Namespace,
 		},
 		Spec: appsv1.DeploymentSpec{
-			Replicas: &vllmRuntime.Spec.Replicas,
+			Replicas: &vllmRuntime.Spec.DeploymentConfig.Replicas,
 			Strategy: appsv1.DeploymentStrategy{
-				Type: appsv1.DeploymentStrategyType(vllmRuntime.Spec.DeployStrategy),
+				Type: appsv1.DeploymentStrategyType(vllmRuntime.Spec.DeploymentConfig.DeployStrategy),
 			},
 			Selector: &metav1.LabelSelector{
 				MatchLabels: labels,
@@ -382,44 +412,18 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production
 							Name:            "vllm",
 							Image:           image,
 							ImagePullPolicy: imagePullPolicy,
-							Command:         []string{"python3", "-m", "vllm.entrypoints.openai.api_server"},
+							Command:         []string{"/opt/venv/bin/vllm", "serve"},
 							Args:            args,
 							Env:             env,
 							Ports: []corev1.ContainerPort{
 								{
 									Name:          "http",
-									ContainerPort: vllmRuntime.Spec.Port,
+									ContainerPort: vllmRuntime.Spec.VLLMConfig.Port,
 								},
 							},
-							Resources: resources,
-							ReadinessProbe: &corev1.Probe{
-								ProbeHandler: corev1.ProbeHandler{
-									HTTPGet: &corev1.HTTPGetAction{
-										Path:   "/health",
-										Port:   intstr.FromInt(int(vllmRuntime.Spec.Port)),
-										Scheme: corev1.URISchemeHTTP,
-									},
-								},
-								InitialDelaySeconds: 30,
-								PeriodSeconds:       20,
-								TimeoutSeconds:      5,
-								SuccessThreshold:    1,
-								FailureThreshold:    10,
-							},
-							LivenessProbe: &corev1.Probe{
-								ProbeHandler: corev1.ProbeHandler{
-									HTTPGet: &corev1.HTTPGetAction{
-										Path:   "/health",
-										Port:   intstr.FromInt(int(vllmRuntime.Spec.Port)),
-										Scheme: corev1.URISchemeHTTP,
-									},
-								},
-								InitialDelaySeconds: 240,
-								PeriodSeconds:       10,
-								TimeoutSeconds:      3,
-								SuccessThreshold:    1,
-								FailureThreshold:    3,
-							},
+							Resources:      resources,
+							ReadinessProbe: readinessProbe,
+							LivenessProbe:  livenessProbe,
 						},
 					},
 				},
@@ -433,7 +437,9 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production
 }
 
 // deploymentNeedsUpdate checks if the deployment needs to be updated
-func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr *productionstackv1alpha1.VLLMRuntime) bool {
+func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(ctx context.Context, dep *appsv1.Deployment, vr *productionstackv1alpha1.VLLMRuntime) bool {
+
+	log := log.FromContext(ctx)
 	// Generate the expected deployment
 	expectedDep := r.deploymentForVLLMRuntime(vr)
 
@@ -442,21 +448,24 @@ func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr
 	actualModelURL := ""
 	// For vllm serve, the model URL is the first argument after the command
 	if len(dep.Spec.Template.Spec.Containers[0].Args) > 0 {
-		actualModelURL = dep.Spec.Template.Spec.Containers[0].Args[1]
+		actualModelURL = dep.Spec.Template.Spec.Containers[0].Args[0]
 	}
 	if expectedModelURL != actualModelURL {
+		log.Info("Model URL mismatch", "expected", expectedModelURL, "actual", actualModelURL)
 		return true
 	}
 
 	// Compare port
-	expectedPort := vr.Spec.Port
+	expectedPort := vr.Spec.VLLMConfig.Port
 	actualPort := dep.Spec.Template.Spec.Containers[0].Ports[0].ContainerPort
 	if expectedPort != actualPort {
+		log.Info("Port mismatch", "expected", expectedPort, "actual", actualPort)
 		return true
 	}
 
 	// Compare image
 	if expectedDep.Spec.Template.Spec.Containers[0].Image != dep.Spec.Template.Spec.Containers[0].Image {
+		log.Info("Image mismatch", "expected", expectedDep.Spec.Template.Spec.Containers[0].Image, "actual", dep.Spec.Template.Spec.Containers[0].Image)
 		return true
 	}
 
@@ -464,6 +473,7 @@ func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr
 	expectedResources := expectedDep.Spec.Template.Spec.Containers[0].Resources
 	actualResources := dep.Spec.Template.Spec.Containers[0].Resources
 	if !reflect.DeepEqual(expectedResources, actualResources) {
+		log.Info("Resources mismatch", "expected", expectedResources, "actual", actualResources)
 		return true
 	}
 
@@ -499,6 +509,7 @@ func (r *VLLMRuntimeReconciler) deploymentNeedsUpdate(dep *appsv1.Deployment, vr
 		expectedLMCacheConfig.DiskOffloadingBufferSize != actualDiskOffloadingBufferSize ||
 		expectedLMCacheConfig.RemoteURL != actualRemoteURL ||
 		expectedLMCacheConfig.RemoteSerde != actualRemoteSerde {
+		log.Info("LM Cache configuration mismatch", "expected", expectedLMCacheConfig, "actual", actualLMCacheConfig)
 		return true
 	}
 
@@ -549,7 +560,7 @@ func (r *VLLMRuntimeReconciler) serviceForVLLMRuntime(vllmRuntime *productionsta
 				{
 					Name:       "http",
 					Port:       80,
-					TargetPort: intstr.FromInt(int(vllmRuntime.Spec.Port)),
+					TargetPort: intstr.FromInt(int(vllmRuntime.Spec.VLLMConfig.Port)),
 					Protocol:   corev1.ProtocolTCP,
 				},
 			},
@@ -564,7 +575,7 @@ func (r *VLLMRuntimeReconciler) serviceForVLLMRuntime(vllmRuntime *productionsta
 // serviceNeedsUpdate checks if the service needs to be updated
 func (r *VLLMRuntimeReconciler) serviceNeedsUpdate(svc *corev1.Service, vr *productionstackv1alpha1.VLLMRuntime) bool {
 	// Compare target port
-	expectedTargetPort := int(vr.Spec.Port)
+	expectedTargetPort := int(vr.Spec.VLLMConfig.Port)
 	actualTargetPort := svc.Spec.Ports[0].TargetPort.IntValue()
 	if expectedTargetPort != actualTargetPort {
 		return true

From 622ea3048bb5f9bcec01345af97b6601197d3a9f Mon Sep 17 00:00:00 2001
From: royyhuang <roy.y.huang@gmail.com>
Date: Fri, 6 Jun 2025 04:15:50 +0000
Subject: [PATCH 09/14] add default operator manifest

Signed-off-by: royyhuang <roy.y.huang@gmail.com>
---
 operator/config/manager/kustomization.yaml |   10 +-
 operator/config/rbac/kustomization.yaml    |    2 -
 operator/default.yaml                      | 1471 ++++++++++++++++++++
 3 files changed, 1476 insertions(+), 7 deletions(-)
 create mode 100644 operator/default.yaml

diff --git a/operator/config/manager/kustomization.yaml b/operator/config/manager/kustomization.yaml
index 5e6ad7ce9..cc9f03e24 100644
--- a/operator/config/manager/kustomization.yaml
+++ b/operator/config/manager/kustomization.yaml
@@ -1,9 +1,9 @@
 resources:
-  - namespace.yaml
-  - deployment.yaml
+- namespace.yaml
+- deployment.yaml
 apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 images:
-  - name: controller
-    newName: lmcache/operator
-    newTag: latest
+- name: controller
+  newName: 1nfinity/production-stack-controller
+  newTag: latest
diff --git a/operator/config/rbac/kustomization.yaml b/operator/config/rbac/kustomization.yaml
index 2041c9e0f..54248b6f0 100644
--- a/operator/config/rbac/kustomization.yaml
+++ b/operator/config/rbac/kustomization.yaml
@@ -35,6 +35,4 @@ resources:
   - vllmruntime_editor_role.yaml
   - vllmruntime_viewer_role.yaml
   # Pod viewer role is used to view pods for vllmrouter's service discovery
-  - pod_viewer_role.yaml
-  - vllmrouter_service_account.yaml
   - vllmrouter_role_binding.yaml
diff --git a/operator/default.yaml b/operator/default.yaml
new file mode 100644
index 000000000..848c64670
--- /dev/null
+++ b/operator/default.yaml
@@ -0,0 +1,1471 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+    control-plane: controller-manager
+  name: production-stack-system
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.17.2
+  name: cacheservers.production-stack.vllm.ai
+spec:
+  group: production-stack.vllm.ai
+  names:
+    kind: CacheServer
+    listKind: CacheServerList
+    plural: cacheservers
+    singular: cacheserver
+  scope: Namespaced
+  versions:
+  - additionalPrinterColumns:
+    - jsonPath: .status.status
+      name: Status
+      type: string
+    - jsonPath: .metadata.creationTimestamp
+      name: Age
+      type: date
+    name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: CacheServer is the Schema for the cacheservers API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: CacheServerSpec defines the desired state of CacheServer
+            properties:
+              deploymentStrategy:
+                default: RollingUpdate
+                description: Deployment strategy
+                enum:
+                - RollingUpdate
+                - Recreate
+                type: string
+              image:
+                description: Image configuration for the cache server
+                properties:
+                  name:
+                    type: string
+                  pullPolicy:
+                    type: string
+                  pullSecretName:
+                    type: string
+                  registry:
+                    type: string
+                required:
+                - name
+                - registry
+                type: object
+              port:
+                default: 8000
+                description: Container port for the cache server
+                format: int32
+                type: integer
+              replicas:
+                default: 1
+                description: Number of replicas
+                format: int32
+                type: integer
+              resources:
+                description: Resource requirements
+                properties:
+                  cpu:
+                    type: string
+                  gpu:
+                    type: string
+                  memory:
+                    type: string
+                type: object
+            required:
+            - deploymentStrategy
+            - image
+            - port
+            - replicas
+            - resources
+            type: object
+          status:
+            description: CacheServerStatus defines the observed state of CacheServer
+            properties:
+              lastUpdated:
+                description: Last time the status was updated
+                format: date-time
+                type: string
+              status:
+                description: Current status of the cache server
+                type: string
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.17.2
+  name: loraadapters.production-stack.vllm.ai
+spec:
+  group: production-stack.vllm.ai
+  names:
+    kind: LoraAdapter
+    listKind: LoraAdapterList
+    plural: loraadapters
+    singular: loraadapter
+  scope: Namespaced
+  versions:
+  - additionalPrinterColumns:
+    - jsonPath: .status.phase
+      name: Phase
+      type: string
+    - jsonPath: .metadata.creationTimestamp
+      name: Age
+      type: date
+    name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: LoraAdapter is the Schema for the loraadapters API.
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: LoraAdapterSpec defines the desired state of LoraAdapter.
+            properties:
+              adapterSource:
+                description: AdapterSource defines where to get the LoRA adapter from.
+                properties:
+                  adapterName:
+                    description: AdapterName is the name of the adapter to apply.
+                    type: string
+                  adapterPath:
+                    description: 'AdapterPath is the path to the LoRA adapter weights.
+                      For local sources: required, specifies the path to the adapter
+                      For remote sources: optional, will be updated by the controller
+                      with the download path'
+                    type: string
+                  credentialsSecretRef:
+                    description: CredentialsSecretRef references a secret containing
+                      storage credentials.
+                    properties:
+                      name:
+                        description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                          TODO: Add other useful fields. apiVersion, kind, uid?'
+                        type: string
+                    type: object
+                    x-kubernetes-map-type: atomic
+                  maxAdapters:
+                    description: MaxAdapters is the maximum number of adapters to
+                      load.
+                    format: int32
+                    type: integer
+                  pattern:
+                    description: Pattern is the pattern to use for the adapter name.
+                    type: string
+                  repository:
+                    description: Repository is the repository to get the LoRA adapter
+                      from.
+                    type: string
+                  type:
+                    description: Type is the type of the adapter source.
+                    enum:
+                    - local
+                    - s3
+                    - http
+                    - huggingface
+                    type: string
+                required:
+                - adapterName
+                - type
+                type: object
+              baseModel:
+                description: BaseModel is the name of the base model this adapter
+                  is for.
+                type: string
+              loraAdapterDeploymentConfig:
+                description: DeploymentConfig defines how the adapter should be deployed
+                properties:
+                  algorithm:
+                    default: default
+                    description: Algorithm specifies which placement algorithm to
+                      use.
+                    enum:
+                    - default
+                    - ordered
+                    - equalized
+                    type: string
+                  replicas:
+                    description: Replicas is the number of replicas that should load
+                      this adapter.
+                    format: int32
+                    minimum: 0
+                    type: integer
+                required:
+                - algorithm
+                type: object
+              vllmApiKey:
+                description: VLLMApiKey defines the configuration for vLLM API key
+                  authentication
+                properties:
+                  secretRef:
+                    description: Reference to a secret containing the API key
+                    properties:
+                      secretKey:
+                        description: Key in the secret containing the API key
+                        type: string
+                      secretName:
+                        description: Name of the secret
+                        type: string
+                    required:
+                    - secretKey
+                    - secretName
+                    type: object
+                  value:
+                    description: Direct API key value
+                    type: string
+                type: object
+            required:
+            - adapterSource
+            - baseModel
+            type: object
+          status:
+            description: LoraAdapterStatus defines the observed state of LoraAdapter.
+            properties:
+              conditions:
+                description: Condition contains details for one aspect of the current
+                  state of this API Resource.
+                items:
+                  description: Condition contains details for one aspect of the current
+                    state of this API Resource.
+                  properties:
+                    lastTransitionTime:
+                      description: LastTransitionTime is the last time the condition
+                        transitioned from one status to another.
+                      format: date-time
+                      type: string
+                    message:
+                      description: Message is a human-readable message indicating
+                        details about why the current state is set.
+                      maxLength: 32768
+                      type: string
+                    reason:
+                      description: Reason is a brief reason for the condition's current
+                        status.
+                      maxLength: 1024
+                      minLength: 1
+                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                      type: string
+                    status:
+                      description: Status is the status of the condition.
+                      enum:
+                      - "True"
+                      - "False"
+                      - Unknown
+                      type: string
+                    type:
+                      description: type of condition in CamelCase.
+                      maxLength: 316
+                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                      type: string
+                  required:
+                  - lastTransitionTime
+                  - message
+                  - reason
+                  - status
+                  - type
+                  type: object
+                type: array
+              loadedAdapters:
+                description: LoadedAdapters tracks the loading status of adapters
+                  and their pod assignments.
+                items:
+                  description: LoadedAdapter represents an adapter that has been loaded
+                    into a pod
+                  properties:
+                    loadTime:
+                      description: LoadTime is when the adapter was loaded
+                      format: date-time
+                      type: string
+                    name:
+                      description: Name is the name of the adapter
+                      type: string
+                    path:
+                      description: Path is the path where the adapter is loaded
+                      type: string
+                    podAssignments:
+                      description: PodAssignments represents the pods this adapter
+                        has been assigned to
+                      properties:
+                        namespace:
+                          description: Namespace is the namespace of the pod
+                          type: string
+                        podName:
+                          description: Pod represents the pod information
+                          type: string
+                      required:
+                      - namespace
+                      - podName
+                      type: object
+                    status:
+                      description: Status is the status of the adapter
+                      type: string
+                  required:
+                  - name
+                  - path
+                  - podAssignments
+                  - status
+                  type: object
+                type: array
+              message:
+                description: Message provides additional information about the current
+                  phase.
+                type: string
+              observedGeneration:
+                description: ObservedGeneration represents the .metadata.generation
+                  that the condition was set based upon.
+                format: int64
+                minimum: 0
+                type: integer
+              phase:
+                description: Phase represents the current phase of the adapter deployment.
+                type: string
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.17.2
+  name: vllmrouters.production-stack.vllm.ai
+spec:
+  group: production-stack.vllm.ai
+  names:
+    kind: VLLMRouter
+    listKind: VLLMRouterList
+    plural: vllmrouters
+    singular: vllmrouter
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: VLLMRouter is the Schema for the vllmrouters API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: VLLMRouterSpec defines the desired state of VLLMRouter
+            properties:
+              enableRouter:
+                default: true
+                description: EnableRouter determines if the router should be deployed
+                type: boolean
+              engineScrapeInterval:
+                description: EngineScrapeInterval for collecting engine statistics
+                format: int32
+                type: integer
+              env:
+                description: Environment variables
+                items:
+                  description: EnvVar represents an environment variable
+                  properties:
+                    name:
+                      type: string
+                    value:
+                      type: string
+                  required:
+                  - name
+                  - value
+                  type: object
+                type: array
+              extraArgs:
+                description: ExtraArgs for additional router arguments
+                items:
+                  type: string
+                type: array
+              image:
+                description: Image configuration
+                properties:
+                  name:
+                    type: string
+                  pullPolicy:
+                    type: string
+                  pullSecretName:
+                    type: string
+                  registry:
+                    type: string
+                required:
+                - name
+                - registry
+                type: object
+              k8sLabelSelector:
+                description: K8sLabelSelector specifies the label selector for vLLM
+                  runtime pods when using k8s service discovery
+                type: string
+              nodeSelectorTerms:
+                description: NodeSelectorTerms for pod scheduling
+                items:
+                  description: |-
+                    A null or empty node selector term matches no objects. The requirements of
+                    them are ANDed.
+                    The TopologySelectorTerm type implements a subset of the NodeSelectorTerm.
+                  properties:
+                    matchExpressions:
+                      description: A list of node selector requirements by node's
+                        labels.
+                      items:
+                        description: |-
+                          A node selector requirement is a selector that contains values, a key, and an operator
+                          that relates the key and values.
+                        properties:
+                          key:
+                            description: The label key that the selector applies to.
+                            type: string
+                          operator:
+                            description: |-
+                              Represents a key's relationship to a set of values.
+                              Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+                            type: string
+                          values:
+                            description: |-
+                              An array of string values. If the operator is In or NotIn,
+                              the values array must be non-empty. If the operator is Exists or DoesNotExist,
+                              the values array must be empty. If the operator is Gt or Lt, the values
+                              array must have a single element, which will be interpreted as an integer.
+                              This array is replaced during a strategic merge patch.
+                            items:
+                              type: string
+                            type: array
+                            x-kubernetes-list-type: atomic
+                        required:
+                        - key
+                        - operator
+                        type: object
+                      type: array
+                      x-kubernetes-list-type: atomic
+                    matchFields:
+                      description: A list of node selector requirements by node's
+                        fields.
+                      items:
+                        description: |-
+                          A node selector requirement is a selector that contains values, a key, and an operator
+                          that relates the key and values.
+                        properties:
+                          key:
+                            description: The label key that the selector applies to.
+                            type: string
+                          operator:
+                            description: |-
+                              Represents a key's relationship to a set of values.
+                              Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+                            type: string
+                          values:
+                            description: |-
+                              An array of string values. If the operator is In or NotIn,
+                              the values array must be non-empty. If the operator is Exists or DoesNotExist,
+                              the values array must be empty. If the operator is Gt or Lt, the values
+                              array must have a single element, which will be interpreted as an integer.
+                              This array is replaced during a strategic merge patch.
+                            items:
+                              type: string
+                            type: array
+                            x-kubernetes-list-type: atomic
+                        required:
+                        - key
+                        - operator
+                        type: object
+                      type: array
+                      x-kubernetes-list-type: atomic
+                  type: object
+                  x-kubernetes-map-type: atomic
+                type: array
+              port:
+                default: 80
+                description: ContainerPort for the router service
+                format: int32
+                type: integer
+              replicas:
+                default: 1
+                description: Replicas specifies the number of router replicas
+                format: int32
+                type: integer
+              requestStatsWindow:
+                description: RequestStatsWindow for request statistics
+                format: int32
+                type: integer
+              resources:
+                description: Resource requirements
+                properties:
+                  cpu:
+                    type: string
+                  gpu:
+                    type: string
+                  memory:
+                    type: string
+                type: object
+              routingLogic:
+                default: roundrobin
+                description: RoutingLogic specifies the routing strategy
+                enum:
+                - roundrobin
+                - session
+                type: string
+              serviceAccountName:
+                description: ServiceAccountName for the router pod
+                type: string
+              serviceDiscovery:
+                default: k8s
+                description: ServiceDiscovery specifies the service discovery method
+                  (k8s or static)
+                enum:
+                - k8s
+                - static
+                type: string
+              sessionKey:
+                default: ""
+                description: SessionKey for session-based routing
+                type: string
+              staticBackends:
+                description: StaticBackends is required when using static service
+                  discovery
+                type: string
+              staticModels:
+                description: StaticModels is required when using static service discovery
+                type: string
+              vllmApiKeyName:
+                type: string
+              vllmApiKeySecret:
+                description: VLLM API Key configuration
+                properties:
+                  name:
+                    default: ""
+                    description: |-
+                      Name of the referent.
+                      This field is effectively required, but due to backwards compatibility is
+                      allowed to be empty. Instances of this type with an empty value here are
+                      almost certainly wrong.
+                      More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                    type: string
+                type: object
+                x-kubernetes-map-type: atomic
+            required:
+            - image
+            - resources
+            type: object
+          status:
+            description: VLLMRouterStatus defines the observed state of VLLMRouter
+            properties:
+              activeRuntimes:
+                description: Number of active runtimes
+                format: int32
+                type: integer
+              lastUpdated:
+                description: Last updated timestamp
+                format: date-time
+                type: string
+              status:
+                description: Router status
+                type: string
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.17.2
+  name: vllmruntimes.production-stack.vllm.ai
+spec:
+  group: production-stack.vllm.ai
+  names:
+    kind: VLLMRuntime
+    listKind: VLLMRuntimeList
+    plural: vllmruntimes
+    shortNames:
+    - vr
+    singular: vllmruntime
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: VLLMRuntime is the Schema for the vllmruntimes API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: VLLMRuntimeSpec defines the desired state of VLLMRuntime
+            properties:
+              deploymentConfig:
+                description: Deployment configuration
+                properties:
+                  deploymentStrategy:
+                    default: RollingUpdate
+                    description: Deploy strategy
+                    enum:
+                    - RollingUpdate
+                    - Recreate
+                    type: string
+                  image:
+                    description: Image configuration
+                    properties:
+                      name:
+                        type: string
+                      pullPolicy:
+                        type: string
+                      pullSecretName:
+                        type: string
+                      registry:
+                        type: string
+                    required:
+                    - name
+                    - registry
+                    type: object
+                  replicas:
+                    default: 1
+                    description: Replicas
+                    format: int32
+                    type: integer
+                  resources:
+                    description: Resource requirements
+                    properties:
+                      cpu:
+                        type: string
+                      gpu:
+                        type: string
+                      memory:
+                        type: string
+                    type: object
+                required:
+                - image
+                - resources
+                type: object
+              lmCacheConfig:
+                description: LM Cache configuration
+                properties:
+                  cpuOffloadingBufferSize:
+                    default: 4Gi
+                    description: CPUOffloadingBufferSize is the size of the CPU offloading
+                      buffer
+                    type: string
+                  diskOffloadingBufferSize:
+                    default: 8Gi
+                    description: DiskOffloadingBufferSize is the size of the disk
+                      offloading buffer
+                    type: string
+                  enabled:
+                    default: false
+                    description: Enabled enables LM Cache
+                    type: boolean
+                  remoteSerde:
+                    description: RemoteSerde is the serialization format for the remote
+                      cache
+                    type: string
+                  remoteUrl:
+                    description: RemoteURL is the URL of the remote cache server
+                    type: string
+                type: object
+              model:
+                description: Model configuration
+                properties:
+                  dtype:
+                    description: Data type
+                    type: string
+                  enableLoRA:
+                    description: Enable LoRA
+                    type: boolean
+                  enableTool:
+                    description: Enable tool
+                    type: boolean
+                  hfTokenName:
+                    default: token
+                    type: string
+                  hfTokenSecret:
+                    description: HuggingFace token secret
+                    properties:
+                      name:
+                        default: ""
+                        description: |-
+                          Name of the referent.
+                          This field is effectively required, but due to backwards compatibility is
+                          allowed to be empty. Instances of this type with an empty value here are
+                          almost certainly wrong.
+                          More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                        type: string
+                    type: object
+                    x-kubernetes-map-type: atomic
+                  maxModelLen:
+                    description: Maximum model length
+                    format: int32
+                    type: integer
+                  maxNumSeqs:
+                    description: Maximum number of sequences
+                    format: int32
+                    type: integer
+                  modelURL:
+                    description: Model URL
+                    type: string
+                  toolCallParser:
+                    description: Tool call parser
+                    type: string
+                required:
+                - modelURL
+                type: object
+              vllmConfig:
+                description: vLLM server configuration
+                properties:
+                  enableChunkedPrefill:
+                    description: Enable chunked prefill
+                    type: boolean
+                  enablePrefixCaching:
+                    description: Enable prefix caching
+                    type: boolean
+                  env:
+                    description: Environment variables
+                    items:
+                      description: EnvVar represents an environment variable
+                      properties:
+                        name:
+                          type: string
+                        value:
+                          type: string
+                      required:
+                      - name
+                      - value
+                      type: object
+                    type: array
+                  extraArgs:
+                    description: Extra arguments for vllm serve
+                    items:
+                      type: string
+                    type: array
+                  gpuMemoryUtilization:
+                    description: GPU memory utilization
+                    type: string
+                  maxLoras:
+                    description: Maximum number of LoRAs
+                    format: int32
+                    type: integer
+                  port:
+                    default: 8000
+                    description: Port for vLLM server
+                    format: int32
+                    type: integer
+                  tensorParallelSize:
+                    description: Tensor parallel size
+                    format: int32
+                    type: integer
+                  v1:
+                    description: Use V1 API
+                    type: boolean
+                type: object
+            required:
+            - deploymentConfig
+            - model
+            - vllmConfig
+            type: object
+          status:
+            description: VLLMRuntimeStatus defines the observed state of VLLMRuntime
+            properties:
+              lastUpdated:
+                description: Last updated timestamp
+                format: date-time
+                type: string
+              modelStatus:
+                description: Model status
+                type: string
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+  name: production-stack-controller-manager
+  namespace: production-stack-system
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+  name: production-stack-vllmrouter-sa
+  namespace: production-stack-system
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+  name: production-stack-leader-election-role
+  namespace: production-stack-system
+rules:
+- apiGroups:
+  - ""
+  resources:
+  - configmaps
+  verbs:
+  - get
+  - list
+  - watch
+  - create
+  - update
+  - patch
+  - delete
+- apiGroups:
+  - coordination.k8s.io
+  resources:
+  - leases
+  verbs:
+  - get
+  - list
+  - watch
+  - create
+  - update
+  - patch
+  - delete
+- apiGroups:
+  - ""
+  resources:
+  - events
+  verbs:
+  - create
+  - patch
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+  name: production-stack-pod-viewer-role
+  namespace: production-stack-system
+rules:
+- apiGroups:
+  - ""
+  resources:
+  - pods
+  verbs:
+  - get
+  - list
+  - watch
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+  name: production-stack-cacheserver-admin-role
+rules:
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - cacheservers
+  verbs:
+  - '*'
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - cacheservers/status
+  verbs:
+  - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+  name: production-stack-cacheserver-editor-role
+rules:
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - cacheservers
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - cacheservers/status
+  verbs:
+  - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+  name: production-stack-cacheserver-viewer-role
+rules:
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - cacheservers
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - cacheservers/status
+  verbs:
+  - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+  name: production-stack-loraadapter-admin-role
+rules:
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - loraadapters
+  verbs:
+  - '*'
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - loraadapters/status
+  verbs:
+  - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+  name: production-stack-loraadapter-editor-role
+rules:
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - loraadapters
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - loraadapters/status
+  verbs:
+  - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+  name: production-stack-loraadapter-viewer-role
+rules:
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - loraadapters
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - loraadapters/status
+  verbs:
+  - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: production-stack-manager-role
+rules:
+- apiGroups:
+  - ""
+  resources:
+  - configmaps
+  - secrets
+  - services
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - ""
+  resources:
+  - pods
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - apps
+  resources:
+  - deployments
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - cacheservers
+  - loraadapters
+  - vllmrouters
+  - vllmruntimes
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - cacheservers/finalizers
+  - loraadapters/finalizers
+  - vllmrouters/finalizers
+  - vllmruntimes/finalizers
+  verbs:
+  - update
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - cacheservers/status
+  - loraadapters/status
+  - vllmrouters/status
+  - vllmruntimes/status
+  verbs:
+  - get
+  - patch
+  - update
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: production-stack-metrics-auth-role
+rules:
+- apiGroups:
+  - authentication.k8s.io
+  resources:
+  - tokenreviews
+  verbs:
+  - create
+- apiGroups:
+  - authorization.k8s.io
+  resources:
+  - subjectaccessreviews
+  verbs:
+  - create
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: production-stack-metrics-reader
+rules:
+- nonResourceURLs:
+  - /metrics
+  verbs:
+  - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+  name: production-stack-vllmrouter-admin-role
+rules:
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - vllmrouters
+  verbs:
+  - '*'
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - vllmrouters/status
+  verbs:
+  - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+  name: production-stack-vllmrouter-editor-role
+rules:
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - vllmrouters
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - vllmrouters/status
+  verbs:
+  - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+  name: production-stack-vllmrouter-viewer-role
+rules:
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - vllmrouters
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - vllmrouters/status
+  verbs:
+  - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+  name: production-stack-vllmruntime-admin-role
+rules:
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - vllmruntimes
+  verbs:
+  - '*'
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - vllmruntimes/status
+  verbs:
+  - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+  name: production-stack-vllmruntime-editor-role
+rules:
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - vllmruntimes
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - vllmruntimes/status
+  verbs:
+  - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+  name: production-stack-vllmruntime-viewer-role
+rules:
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - vllmruntimes
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - vllmruntimes/status
+  verbs:
+  - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+  name: production-stack-leader-election-rolebinding
+  namespace: production-stack-system
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: production-stack-leader-election-role
+subjects:
+- kind: ServiceAccount
+  name: production-stack-controller-manager
+  namespace: production-stack-system
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+  name: production-stack-pod-viewer-binding
+  namespace: production-stack-system
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: production-stack-pod-viewer-role
+subjects:
+- kind: ServiceAccount
+  name: production-stack-vllmrouter-sa
+  namespace: production-stack-system
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+  name: production-stack-manager-rolebinding
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: production-stack-manager-role
+subjects:
+- kind: ServiceAccount
+  name: production-stack-controller-manager
+  namespace: production-stack-system
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: production-stack-metrics-auth-rolebinding
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: production-stack-metrics-auth-role
+subjects:
+- kind: ServiceAccount
+  name: production-stack-controller-manager
+  namespace: production-stack-system
+---
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+    control-plane: controller-manager
+  name: production-stack-controller-manager-metrics-service
+  namespace: production-stack-system
+spec:
+  ports:
+  - name: https
+    port: 8443
+    protocol: TCP
+    targetPort: 8443
+  selector:
+    app.kubernetes.io/name: production-stack
+    control-plane: controller-manager
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  labels:
+    app.kubernetes.io/component: manager
+    app.kubernetes.io/created-by: production-stack
+    app.kubernetes.io/instance: production-stack
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/name: production-stack
+    app.kubernetes.io/part-of: production-stack
+  name: production-stack-production-stack-controller-manager
+  namespace: production-stack-system
+spec:
+  selector:
+    matchLabels:
+      app.kubernetes.io/component: manager
+      app.kubernetes.io/instance: production-stack
+      app.kubernetes.io/name: production-stack
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/component: manager
+        app.kubernetes.io/instance: production-stack
+        app.kubernetes.io/name: production-stack
+    spec:
+      containers:
+      - args:
+        - --metrics-bind-address=:8443
+        - --leader-elect
+        - --health-probe-bind-address=:8081
+        command:
+        - /manager
+        image: 1nfinity/production-stack-controller:latest
+        imagePullPolicy: Always
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: 8081
+          initialDelaySeconds: 15
+          periodSeconds: 20
+        name: manager
+        ports: []
+        readinessProbe:
+          httpGet:
+            path: /readyz
+            port: 8081
+          initialDelaySeconds: 5
+          periodSeconds: 10
+        resources:
+          limits:
+            cpu: 500m
+            memory: 128Mi
+          requests:
+            cpu: 10m
+            memory: 64Mi
+        securityContext:
+          allowPrivilegeEscalation: false
+          capabilities:
+            drop:
+            - ALL
+        volumeMounts: []
+      securityContext:
+        runAsNonRoot: true
+        seccompProfile:
+          type: RuntimeDefault
+      serviceAccountName: production-stack-controller-manager
+      terminationGracePeriodSeconds: 10
+      volumes: []

From f5c9b28702b4eeb13f99f579ae8751a0081076c0 Mon Sep 17 00:00:00 2001
From: royyhuang <roy.y.huang@gmail.com>
Date: Fri, 6 Jun 2025 06:00:28 +0000
Subject: [PATCH 10/14] move rbac for vllm-router pod to controller

Signed-off-by: royyhuang <roy.y.huang@gmail.com>
---
 operator/{ => config}/default.yaml            |  58 +--
 operator/config/default/kustomization.yaml    | 414 +++++++++---------
 operator/config/rbac/kustomization.yaml       |   2 -
 operator/config/rbac/role.yaml                | 152 ++++---
 operator/config/rbac/role_binding.yaml        |   6 +-
 .../config/rbac/vllmrouter_role_binding.yaml  |  43 --
 .../controller/vllmrouter_controller.go       | 125 ++++++
 7 files changed, 432 insertions(+), 368 deletions(-)
 rename operator/{ => config}/default.yaml (97%)
 delete mode 100644 operator/config/rbac/vllmrouter_role_binding.yaml

diff --git a/operator/default.yaml b/operator/config/default.yaml
similarity index 97%
rename from operator/default.yaml
rename to operator/config/default.yaml
index 848c64670..85242a851 100644
--- a/operator/default.yaml
+++ b/operator/config/default.yaml
@@ -864,15 +864,6 @@ metadata:
   name: production-stack-controller-manager
   namespace: production-stack-system
 ---
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  labels:
-    app.kubernetes.io/managed-by: kustomize
-    app.kubernetes.io/name: production-stack
-  name: production-stack-vllmrouter-sa
-  namespace: production-stack-system
----
 apiVersion: rbac.authorization.k8s.io/v1
 kind: Role
 metadata:
@@ -915,24 +906,6 @@ rules:
   - patch
 ---
 apiVersion: rbac.authorization.k8s.io/v1
-kind: Role
-metadata:
-  labels:
-    app.kubernetes.io/managed-by: kustomize
-    app.kubernetes.io/name: production-stack
-  name: production-stack-pod-viewer-role
-  namespace: production-stack-system
-rules:
-- apiGroups:
-  - ""
-  resources:
-  - pods
-  verbs:
-  - get
-  - list
-  - watch
----
-apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
 metadata:
   labels:
@@ -1084,6 +1057,7 @@ rules:
   resources:
   - configmaps
   - secrets
+  - serviceaccounts
   - services
   verbs:
   - create
@@ -1148,6 +1122,19 @@ rules:
   - get
   - patch
   - update
+- apiGroups:
+  - rbac.authorization.k8s.io
+  resources:
+  - rolebindings
+  - roles
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
@@ -1337,23 +1324,6 @@ subjects:
   namespace: production-stack-system
 ---
 apiVersion: rbac.authorization.k8s.io/v1
-kind: RoleBinding
-metadata:
-  labels:
-    app.kubernetes.io/managed-by: kustomize
-    app.kubernetes.io/name: production-stack
-  name: production-stack-pod-viewer-binding
-  namespace: production-stack-system
-roleRef:
-  apiGroup: rbac.authorization.k8s.io
-  kind: Role
-  name: production-stack-pod-viewer-role
-subjects:
-- kind: ServiceAccount
-  name: production-stack-vllmrouter-sa
-  namespace: production-stack-system
----
-apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRoleBinding
 metadata:
   labels:
diff --git a/operator/config/default/kustomization.yaml b/operator/config/default/kustomization.yaml
index db3e47347..a4089b35f 100644
--- a/operator/config/default/kustomization.yaml
+++ b/operator/config/default/kustomization.yaml
@@ -15,220 +15,220 @@ namePrefix: production-stack-
 #    someName: someValue
 
 resources:
-- ../crd
-- ../rbac
-- ../manager
-# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
-# crd/kustomization.yaml
-#- ../webhook
-# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required.
-#- ../certmanager
-# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
-#- ../prometheus
-# [METRICS] Expose the controller manager metrics service.
-- metrics_service.yaml
+  - ../crd
+  - ../rbac
+  - ../manager
+  # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
+  # crd/kustomization.yaml
+  #- ../webhook
+  # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required.
+  #- ../certmanager
+  # [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
+  #- ../prometheus
+  # [METRICS] Expose the controller manager metrics service.
+  - metrics_service.yaml
 # [NETWORK POLICY] Protect the /metrics endpoint and Webhook Server with NetworkPolicy.
 # Only Pod(s) running a namespace labeled with 'metrics: enabled' will be able to gather the metrics.
 # Only CR(s) which requires webhooks and are applied on namespaces labeled with 'webhooks: enabled' will
 # be able to communicate with the Webhook Server.
 #- ../network-policy
 
-# Uncomment the patches line if you enable Metrics
+# Remove transformers section and add patches
 patches:
-# [METRICS] The following patch will enable the metrics endpoint using HTTPS and the port :8443.
-# More info: https://book.kubebuilder.io/reference/metrics
-- path: manager_metrics_patch.yaml
-  target:
-    kind: Deployment
+  # [METRICS] The following patch will enable the metrics endpoint using HTTPS and the port :8443.
+  # More info: https://book.kubebuilder.io/reference/metrics
+  - path: manager_metrics_patch.yaml
+    target:
+      kind: Deployment
 
-# Uncomment the patches line if you enable Metrics and CertManager
-# [METRICS-WITH-CERTS] To enable metrics protected with certManager, uncomment the following line.
-# This patch will protect the metrics with certManager self-signed certs.
-#- path: cert_metrics_manager_patch.yaml
-#  target:
-#    kind: Deployment
+  # Uncomment the patches line if you enable Metrics and CertManager
+  # [METRICS-WITH-CERTS] To enable metrics protected with certManager, uncomment the following line.
+  # This patch will protect the metrics with certManager self-signed certs.
+  #- path: cert_metrics_manager_patch.yaml
+  #  target:
+  #    kind: Deployment
 
-# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
-# crd/kustomization.yaml
-#- path: manager_webhook_patch.yaml
-#  target:
-#    kind: Deployment
+  # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
+  # crd/kustomization.yaml
+  #- path: manager_webhook_patch.yaml
+  #  target:
+  #    kind: Deployment
 
-# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix.
-# Uncomment the following replacements to add the cert-manager CA injection annotations
-#replacements:
-# - source: # Uncomment the following block to enable certificates for metrics
-#     kind: Service
-#     version: v1
-#     name: controller-manager-metrics-service
-#     fieldPath: metadata.name
-#   targets:
-#     - select:
-#         kind: Certificate
-#         group: cert-manager.io
-#         version: v1
-#         name: metrics-certs
-#       fieldPaths:
-#         - spec.dnsNames.0
-#         - spec.dnsNames.1
-#       options:
-#         delimiter: '.'
-#         index: 0
-#         create: true
-#     - select: # Uncomment the following to set the Service name for TLS config in Prometheus ServiceMonitor
-#         kind: ServiceMonitor
-#         group: monitoring.coreos.com
-#         version: v1
-#         name: controller-manager-metrics-monitor
-#       fieldPaths:
-#         - spec.endpoints.0.tlsConfig.serverName
-#       options:
-#         delimiter: '.'
-#         index: 0
-#         create: true
-#
-# - source:
-#     kind: Service
-#     version: v1
-#     name: controller-manager-metrics-service
-#     fieldPath: metadata.namespace
-#   targets:
-#     - select:
-#         kind: Certificate
-#         group: cert-manager.io
-#         version: v1
-#         name: metrics-certs
-#       fieldPaths:
-#         - spec.dnsNames.0
-#         - spec.dnsNames.1
-#       options:
-#         delimiter: '.'
-#         index: 1
-#         create: true
-#     - select: # Uncomment the following to set the Service namespace for TLS in Prometheus ServiceMonitor
-#         kind: ServiceMonitor
-#         group: monitoring.coreos.com
-#         version: v1
-#         name: controller-manager-metrics-monitor
-#       fieldPaths:
-#         - spec.endpoints.0.tlsConfig.serverName
-#       options:
-#         delimiter: '.'
-#         index: 1
-#         create: true
-#
-# - source: # Uncomment the following block if you have any webhook
-#     kind: Service
-#     version: v1
-#     name: webhook-service
-#     fieldPath: .metadata.name # Name of the service
-#   targets:
-#     - select:
-#         kind: Certificate
-#         group: cert-manager.io
-#         version: v1
-#         name: serving-cert
-#       fieldPaths:
-#         - .spec.dnsNames.0
-#         - .spec.dnsNames.1
-#       options:
-#         delimiter: '.'
-#         index: 0
-#         create: true
-# - source:
-#     kind: Service
-#     version: v1
-#     name: webhook-service
-#     fieldPath: .metadata.namespace # Namespace of the service
-#   targets:
-#     - select:
-#         kind: Certificate
-#         group: cert-manager.io
-#         version: v1
-#         name: serving-cert
-#       fieldPaths:
-#         - .spec.dnsNames.0
-#         - .spec.dnsNames.1
-#       options:
-#         delimiter: '.'
-#         index: 1
-#         create: true
-#
-# - source: # Uncomment the following block if you have a ValidatingWebhook (--programmatic-validation)
-#     kind: Certificate
-#     group: cert-manager.io
-#     version: v1
-#     name: serving-cert # This name should match the one in certificate.yaml
-#     fieldPath: .metadata.namespace # Namespace of the certificate CR
-#   targets:
-#     - select:
-#         kind: ValidatingWebhookConfiguration
-#       fieldPaths:
-#         - .metadata.annotations.[cert-manager.io/inject-ca-from]
-#       options:
-#         delimiter: '/'
-#         index: 0
-#         create: true
-# - source:
-#     kind: Certificate
-#     group: cert-manager.io
-#     version: v1
-#     name: serving-cert
-#     fieldPath: .metadata.name
-#   targets:
-#     - select:
-#         kind: ValidatingWebhookConfiguration
-#       fieldPaths:
-#         - .metadata.annotations.[cert-manager.io/inject-ca-from]
-#       options:
-#         delimiter: '/'
-#         index: 1
-#         create: true
-#
-# - source: # Uncomment the following block if you have a DefaultingWebhook (--defaulting )
-#     kind: Certificate
-#     group: cert-manager.io
-#     version: v1
-#     name: serving-cert
-#     fieldPath: .metadata.namespace # Namespace of the certificate CR
-#   targets:
-#     - select:
-#         kind: MutatingWebhookConfiguration
-#       fieldPaths:
-#         - .metadata.annotations.[cert-manager.io/inject-ca-from]
-#       options:
-#         delimiter: '/'
-#         index: 0
-#         create: true
-# - source:
-#     kind: Certificate
-#     group: cert-manager.io
-#     version: v1
-#     name: serving-cert
-#     fieldPath: .metadata.name
-#   targets:
-#     - select:
-#         kind: MutatingWebhookConfiguration
-#       fieldPaths:
-#         - .metadata.annotations.[cert-manager.io/inject-ca-from]
-#       options:
-#         delimiter: '/'
-#         index: 1
-#         create: true
-#
-# - source: # Uncomment the following block if you have a ConversionWebhook (--conversion)
-#     kind: Certificate
-#     group: cert-manager.io
-#     version: v1
-#     name: serving-cert
-#     fieldPath: .metadata.namespace # Namespace of the certificate CR
-#   targets: # Do not remove or uncomment the following scaffold marker; required to generate code for target CRD.
-# +kubebuilder:scaffold:crdkustomizecainjectionns
-# - source:
-#     kind: Certificate
-#     group: cert-manager.io
-#     version: v1
-#     name: serving-cert
-#     fieldPath: .metadata.name
-#   targets: # Do not remove or uncomment the following scaffold marker; required to generate code for target CRD.
-# +kubebuilder:scaffold:crdkustomizecainjectionname
+  # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix.
+  # Uncomment the following replacements to add the cert-manager CA injection annotations
+  #replacements:
+  # - source: # Uncomment the following block to enable certificates for metrics
+  #     kind: Service
+  #     version: v1
+  #     name: controller-manager-metrics-service
+  #     fieldPath: metadata.name
+  #   targets:
+  #     - select:
+  #         kind: Certificate
+  #         group: cert-manager.io
+  #         version: v1
+  #         name: metrics-certs
+  #       fieldPaths:
+  #         - spec.dnsNames.0
+  #         - spec.dnsNames.1
+  #       options:
+  #         delimiter: '.'
+  #         index: 0
+  #         create: true
+  #     - select: # Uncomment the following to set the Service name for TLS config in Prometheus ServiceMonitor
+  #         kind: ServiceMonitor
+  #         group: monitoring.coreos.com
+  #         version: v1
+  #         name: controller-manager-metrics-monitor
+  #       fieldPaths:
+  #         - spec.endpoints.0.tlsConfig.serverName
+  #       options:
+  #         delimiter: '.'
+  #         index: 0
+  #         create: true
+  #
+  # - source:
+  #     kind: Service
+  #     version: v1
+  #     name: controller-manager-metrics-service
+  #     fieldPath: metadata.namespace
+  #   targets:
+  #     - select:
+  #         kind: Certificate
+  #         group: cert-manager.io
+  #         version: v1
+  #         name: metrics-certs
+  #       fieldPaths:
+  #         - spec.dnsNames.0
+  #         - spec.dnsNames.1
+  #       options:
+  #         delimiter: '.'
+  #         index: 1
+  #         create: true
+  #     - select: # Uncomment the following to set the Service namespace for TLS in Prometheus ServiceMonitor
+  #         kind: ServiceMonitor
+  #         group: monitoring.coreos.com
+  #         version: v1
+  #         name: controller-manager-metrics-monitor
+  #       fieldPaths:
+  #         - spec.endpoints.0.tlsConfig.serverName
+  #       options:
+  #         delimiter: '.'
+  #         index: 1
+  #         create: true
+  #
+  # - source: # Uncomment the following block if you have any webhook
+  #     kind: Service
+  #     version: v1
+  #     name: webhook-service
+  #     fieldPath: .metadata.name # Name of the service
+  #   targets:
+  #     - select:
+  #         kind: Certificate
+  #         group: cert-manager.io
+  #         version: v1
+  #         name: serving-cert
+  #       fieldPaths:
+  #         - .spec.dnsNames.0
+  #         - .spec.dnsNames.1
+  #       options:
+  #         delimiter: '.'
+  #         index: 0
+  #         create: true
+  # - source:
+  #     kind: Service
+  #     version: v1
+  #     name: webhook-service
+  #     fieldPath: .metadata.namespace # Namespace of the service
+  #   targets:
+  #     - select:
+  #         kind: Certificate
+  #         group: cert-manager.io
+  #         version: v1
+  #         name: serving-cert
+  #       fieldPaths:
+  #         - .spec.dnsNames.0
+  #         - .spec.dnsNames.1
+  #       options:
+  #         delimiter: '.'
+  #         index: 1
+  #         create: true
+  #
+  # - source: # Uncomment the following block if you have a ValidatingWebhook (--programmatic-validation)
+  #     kind: Certificate
+  #     group: cert-manager.io
+  #     version: v1
+  #     name: serving-cert # This name should match the one in certificate.yaml
+  #     fieldPath: .metadata.namespace # Namespace of the certificate CR
+  #   targets:
+  #     - select:
+  #         kind: ValidatingWebhookConfiguration
+  #       fieldPaths:
+  #         - .metadata.annotations.[cert-manager.io/inject-ca-from]
+  #       options:
+  #         delimiter: '/'
+  #         index: 0
+  #         create: true
+  # - source:
+  #     kind: Certificate
+  #     group: cert-manager.io
+  #     version: v1
+  #     name: serving-cert
+  #     fieldPath: .metadata.name
+  #   targets:
+  #     - select:
+  #         kind: ValidatingWebhookConfiguration
+  #       fieldPaths:
+  #         - .metadata.annotations.[cert-manager.io/inject-ca-from]
+  #       options:
+  #         delimiter: '/'
+  #         index: 1
+  #         create: true
+  #
+  # - source: # Uncomment the following block if you have a DefaultingWebhook (--defaulting )
+  #     kind: Certificate
+  #     group: cert-manager.io
+  #     version: v1
+  #     name: serving-cert
+  #     fieldPath: .metadata.namespace # Namespace of the certificate CR
+  #   targets:
+  #     - select:
+  #         kind: MutatingWebhookConfiguration
+  #       fieldPaths:
+  #         - .metadata.annotations.[cert-manager.io/inject-ca-from]
+  #       options:
+  #         delimiter: '/'
+  #         index: 0
+  #         create: true
+  # - source:
+  #     kind: Certificate
+  #     group: cert-manager.io
+  #     version: v1
+  #     name: serving-cert
+  #     fieldPath: .metadata.name
+  #   targets:
+  #     - select:
+  #         kind: MutatingWebhookConfiguration
+  #       fieldPaths:
+  #         - .metadata.annotations.[cert-manager.io/inject-ca-from]
+  #       options:
+  #         delimiter: '/'
+  #         index: 1
+  #         create: true
+  #
+  # - source: # Uncomment the following block if you have a ConversionWebhook (--conversion)
+  #     kind: Certificate
+  #     group: cert-manager.io
+  #     version: v1
+  #     name: serving-cert
+  #     fieldPath: .metadata.namespace # Namespace of the certificate CR
+  #   targets: # Do not remove or uncomment the following scaffold marker; required to generate code for target CRD.
+  # +kubebuilder:scaffold:crdkustomizecainjectionns
+  # - source:
+  #     kind: Certificate
+  #     group: cert-manager.io
+  #     version: v1
+  #     name: serving-cert
+  #     fieldPath: .metadata.name
+  #   targets: # Do not remove or uncomment the following scaffold marker; required to generate code for target CRD.
+  # +kubebuilder:scaffold:crdkustomizecainjectionname
diff --git a/operator/config/rbac/kustomization.yaml b/operator/config/rbac/kustomization.yaml
index 54248b6f0..9f20ab9e2 100644
--- a/operator/config/rbac/kustomization.yaml
+++ b/operator/config/rbac/kustomization.yaml
@@ -34,5 +34,3 @@ resources:
   - vllmruntime_admin_role.yaml
   - vllmruntime_editor_role.yaml
   - vllmruntime_viewer_role.yaml
-  # Pod viewer role is used to view pods for vllmrouter's service discovery
-  - vllmrouter_role_binding.yaml
diff --git a/operator/config/rbac/role.yaml b/operator/config/rbac/role.yaml
index 9dedd14d2..8803b3f5c 100644
--- a/operator/config/rbac/role.yaml
+++ b/operator/config/rbac/role.yaml
@@ -4,72 +4,86 @@ kind: ClusterRole
 metadata:
   name: manager-role
 rules:
-- apiGroups:
-  - ""
-  resources:
-  - configmaps
-  - secrets
-  - services
-  verbs:
-  - create
-  - delete
-  - get
-  - list
-  - patch
-  - update
-  - watch
-- apiGroups:
-  - ""
-  resources:
-  - pods
-  verbs:
-  - get
-  - list
-  - watch
-- apiGroups:
-  - apps
-  resources:
-  - deployments
-  verbs:
-  - create
-  - delete
-  - get
-  - list
-  - patch
-  - update
-  - watch
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - cacheservers
-  - loraadapters
-  - vllmrouters
-  - vllmruntimes
-  verbs:
-  - create
-  - delete
-  - get
-  - list
-  - patch
-  - update
-  - watch
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - cacheservers/finalizers
-  - loraadapters/finalizers
-  - vllmrouters/finalizers
-  - vllmruntimes/finalizers
-  verbs:
-  - update
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - cacheservers/status
-  - loraadapters/status
-  - vllmrouters/status
-  - vllmruntimes/status
-  verbs:
-  - get
-  - patch
-  - update
+  - apiGroups:
+      - ""
+    resources:
+      - configmaps
+      - secrets
+      - serviceaccounts
+      - services
+    verbs:
+      - create
+      - delete
+      - get
+      - list
+      - patch
+      - update
+      - watch
+  - apiGroups:
+      - ""
+    resources:
+      - pods
+    verbs:
+      - get
+      - list
+      - watch
+  - apiGroups:
+      - apps
+    resources:
+      - deployments
+    verbs:
+      - create
+      - delete
+      - get
+      - list
+      - patch
+      - update
+      - watch
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - cacheservers
+      - loraadapters
+      - vllmrouters
+      - vllmruntimes
+    verbs:
+      - create
+      - delete
+      - get
+      - list
+      - patch
+      - update
+      - watch
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - cacheservers/finalizers
+      - loraadapters/finalizers
+      - vllmrouters/finalizers
+      - vllmruntimes/finalizers
+    verbs:
+      - update
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - cacheservers/status
+      - loraadapters/status
+      - vllmrouters/status
+      - vllmruntimes/status
+    verbs:
+      - get
+      - patch
+      - update
+  - apiGroups:
+      - rbac.authorization.k8s.io
+    resources:
+      - rolebindings
+      - roles
+    verbs:
+      - create
+      - delete
+      - get
+      - list
+      - patch
+      - update
+      - watch
diff --git a/operator/config/rbac/role_binding.yaml b/operator/config/rbac/role_binding.yaml
index b61dbe83f..ab3cc7b55 100644
--- a/operator/config/rbac/role_binding.yaml
+++ b/operator/config/rbac/role_binding.yaml
@@ -10,6 +10,6 @@ roleRef:
   kind: ClusterRole
   name: manager-role
 subjects:
-- kind: ServiceAccount
-  name: controller-manager
-  namespace: system
+  - kind: ServiceAccount
+    name: controller-manager
+    namespace: system
diff --git a/operator/config/rbac/vllmrouter_role_binding.yaml b/operator/config/rbac/vllmrouter_role_binding.yaml
deleted file mode 100644
index 2807c765e..000000000
--- a/operator/config/rbac/vllmrouter_role_binding.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-apiVersion: rbac.authorization.k8s.io/v1
-kind: Role
-metadata:
-  name: pod-viewer-role
-  namespace: default
-  labels:
-    app.kubernetes.io/name: production-stack
-    app.kubernetes.io/managed-by: kustomize
-rules:
-  - apiGroups:
-      - ""
-    resources:
-      - pods
-    verbs:
-      - get
-      - list
-      - watch
----
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: vllmrouter-sa
-  namespace: default
-  labels:
-    app.kubernetes.io/name: production-stack
-    app.kubernetes.io/managed-by: kustomize
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: RoleBinding
-metadata:
-  name: pod-viewer-binding
-  namespace: default
-  labels:
-    app.kubernetes.io/name: production-stack
-    app.kubernetes.io/managed-by: kustomize
-subjects:
-  - kind: ServiceAccount
-    name: vllmrouter-sa
-    namespace: default
-roleRef:
-  kind: Role
-  name: pod-viewer-role
-  apiGroup: rbac.authorization.k8s.io
diff --git a/operator/internal/controller/vllmrouter_controller.go b/operator/internal/controller/vllmrouter_controller.go
index 3bd198bf7..793ffb0f8 100644
--- a/operator/internal/controller/vllmrouter_controller.go
+++ b/operator/internal/controller/vllmrouter_controller.go
@@ -23,6 +23,7 @@ import (
 
 	appsv1 "k8s.io/api/apps/v1"
 	corev1 "k8s.io/api/core/v1"
+	rbacv1 "k8s.io/api/rbac/v1"
 	"k8s.io/apimachinery/pkg/api/errors"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -51,6 +52,9 @@ type VLLMRouterReconciler struct {
 // +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;delete
 // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete
 // +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=roles,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=rolebindings,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=core,resources=serviceaccounts,verbs=get;list;watch;create;update;patch;delete
 
 // Reconcile is part of the main kubernetes reconciliation loop which aims to
 // move the current state of the cluster closer to the desired state.
@@ -72,6 +76,60 @@ func (r *VLLMRouterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 		return ctrl.Result{}, err
 	}
 
+	// Create ServiceAccount if it doesn't exist
+	sa := &corev1.ServiceAccount{}
+	err = r.Get(ctx, types.NamespacedName{Name: "vllmrouter-sa", Namespace: router.Namespace}, sa)
+	if err != nil && errors.IsNotFound(err) {
+		sa = r.serviceAccountForVLLMRouter(router)
+		log.Info("Creating a new ServiceAccount", "ServiceAccount.Namespace", sa.Namespace, "ServiceAccount.Name", sa.Name)
+		err = r.Create(ctx, sa)
+		if err != nil {
+			log.Error(err, "Failed to create new ServiceAccount", "ServiceAccount.Namespace", sa.Namespace, "ServiceAccount.Name", sa.Name)
+			return ctrl.Result{}, err
+		}
+		// ServiceAccount created successfully - return and requeue
+		return ctrl.Result{Requeue: true}, nil
+	} else if err != nil {
+		log.Error(err, "Failed to get ServiceAccount")
+		return ctrl.Result{}, err
+	}
+
+	// Create Role if it doesn't exist
+	role := &rbacv1.Role{}
+	err = r.Get(ctx, types.NamespacedName{Name: "pod-viewer-role", Namespace: router.Namespace}, role)
+	if err != nil && errors.IsNotFound(err) {
+		role = r.roleForVLLMRouter(router)
+		log.Info("Creating a new Role", "Role.Namespace", role.Namespace, "Role.Name", role.Name)
+		err = r.Create(ctx, role)
+		if err != nil {
+			log.Error(err, "Failed to create new Role", "Role.Namespace", role.Namespace, "Role.Name", role.Name)
+			return ctrl.Result{}, err
+		}
+		// Role created successfully - return and requeue
+		return ctrl.Result{Requeue: true}, nil
+	} else if err != nil {
+		log.Error(err, "Failed to get Role")
+		return ctrl.Result{}, err
+	}
+
+	// Create RoleBinding if it doesn't exist
+	roleBinding := &rbacv1.RoleBinding{}
+	err = r.Get(ctx, types.NamespacedName{Name: "pod-viewer-binding", Namespace: router.Namespace}, roleBinding)
+	if err != nil && errors.IsNotFound(err) {
+		roleBinding = r.roleBindingForVLLMRouter(router)
+		log.Info("Creating a new RoleBinding", "RoleBinding.Namespace", roleBinding.Namespace, "RoleBinding.Name", roleBinding.Name)
+		err = r.Create(ctx, roleBinding)
+		if err != nil {
+			log.Error(err, "Failed to create new RoleBinding", "RoleBinding.Namespace", roleBinding.Namespace, "RoleBinding.Name", roleBinding.Name)
+			return ctrl.Result{}, err
+		}
+		// RoleBinding created successfully - return and requeue
+		return ctrl.Result{Requeue: true}, nil
+	} else if err != nil {
+		log.Error(err, "Failed to get RoleBinding")
+		return ctrl.Result{}, err
+	}
+
 	// Check if the service already exists, if not create a new one
 	foundService := &corev1.Service{}
 	err = r.Get(ctx, types.NamespacedName{Name: router.Name, Namespace: router.Namespace}, foundService)
@@ -380,6 +438,73 @@ func (r *VLLMRouterReconciler) serviceForVLLMRouter(router *servingv1alpha1.VLLM
 	return svc
 }
 
+// serviceAccountForVLLMRouter returns a ServiceAccount object
+func (r *VLLMRouterReconciler) serviceAccountForVLLMRouter(router *servingv1alpha1.VLLMRouter) *corev1.ServiceAccount {
+	sa := &corev1.ServiceAccount{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "vllmrouter-sa",
+			Namespace: router.Namespace,
+			Labels: map[string]string{
+				"app.kubernetes.io/name":       "production-stack",
+				"app.kubernetes.io/managed-by": "kustomize",
+			},
+		},
+	}
+	ctrl.SetControllerReference(router, sa, r.Scheme)
+	return sa
+}
+
+// roleForVLLMRouter returns a Role object
+func (r *VLLMRouterReconciler) roleForVLLMRouter(router *servingv1alpha1.VLLMRouter) *rbacv1.Role {
+	role := &rbacv1.Role{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "pod-viewer-role",
+			Namespace: router.Namespace,
+			Labels: map[string]string{
+				"app.kubernetes.io/name":       "production-stack",
+				"app.kubernetes.io/managed-by": "kustomize",
+			},
+		},
+		Rules: []rbacv1.PolicyRule{
+			{
+				APIGroups: []string{""},
+				Resources: []string{"pods"},
+				Verbs:     []string{"get", "list", "watch"},
+			},
+		},
+	}
+	ctrl.SetControllerReference(router, role, r.Scheme)
+	return role
+}
+
+// roleBindingForVLLMRouter returns a RoleBinding object
+func (r *VLLMRouterReconciler) roleBindingForVLLMRouter(router *servingv1alpha1.VLLMRouter) *rbacv1.RoleBinding {
+	roleBinding := &rbacv1.RoleBinding{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "pod-viewer-binding",
+			Namespace: router.Namespace,
+			Labels: map[string]string{
+				"app.kubernetes.io/name":       "production-stack",
+				"app.kubernetes.io/managed-by": "kustomize",
+			},
+		},
+		Subjects: []rbacv1.Subject{
+			{
+				Kind:      "ServiceAccount",
+				Name:      "vllmrouter-sa",
+				Namespace: router.Namespace,
+			},
+		},
+		RoleRef: rbacv1.RoleRef{
+			Kind:     "Role",
+			Name:     "pod-viewer-role",
+			APIGroup: "rbac.authorization.k8s.io",
+		},
+	}
+	ctrl.SetControllerReference(router, roleBinding, r.Scheme)
+	return roleBinding
+}
+
 // SetupWithManager sets up the controller with the Manager.
 func (r *VLLMRouterReconciler) SetupWithManager(mgr ctrl.Manager) error {
 	return ctrl.NewControllerManagedBy(mgr).

From e984ecd3ac72644ed0fd847f9399f0e98ded7217 Mon Sep 17 00:00:00 2001
From: royyhuang <roy.y.huang@gmail.com>
Date: Fri, 6 Jun 2025 06:07:07 +0000
Subject: [PATCH 11/14] use service account name defined in vllm-router cr to
 create service account instead of hardcoding

Signed-off-by: royyhuang <roy.y.huang@gmail.com>
---
 operator/config/rbac/role.yaml                | 166 +++++++++---------
 .../controller/vllmrouter_controller.go       |   6 +-
 2 files changed, 86 insertions(+), 86 deletions(-)

diff --git a/operator/config/rbac/role.yaml b/operator/config/rbac/role.yaml
index 8803b3f5c..a2f2d5f46 100644
--- a/operator/config/rbac/role.yaml
+++ b/operator/config/rbac/role.yaml
@@ -4,86 +4,86 @@ kind: ClusterRole
 metadata:
   name: manager-role
 rules:
-  - apiGroups:
-      - ""
-    resources:
-      - configmaps
-      - secrets
-      - serviceaccounts
-      - services
-    verbs:
-      - create
-      - delete
-      - get
-      - list
-      - patch
-      - update
-      - watch
-  - apiGroups:
-      - ""
-    resources:
-      - pods
-    verbs:
-      - get
-      - list
-      - watch
-  - apiGroups:
-      - apps
-    resources:
-      - deployments
-    verbs:
-      - create
-      - delete
-      - get
-      - list
-      - patch
-      - update
-      - watch
-  - apiGroups:
-      - production-stack.vllm.ai
-    resources:
-      - cacheservers
-      - loraadapters
-      - vllmrouters
-      - vllmruntimes
-    verbs:
-      - create
-      - delete
-      - get
-      - list
-      - patch
-      - update
-      - watch
-  - apiGroups:
-      - production-stack.vllm.ai
-    resources:
-      - cacheservers/finalizers
-      - loraadapters/finalizers
-      - vllmrouters/finalizers
-      - vllmruntimes/finalizers
-    verbs:
-      - update
-  - apiGroups:
-      - production-stack.vllm.ai
-    resources:
-      - cacheservers/status
-      - loraadapters/status
-      - vllmrouters/status
-      - vllmruntimes/status
-    verbs:
-      - get
-      - patch
-      - update
-  - apiGroups:
-      - rbac.authorization.k8s.io
-    resources:
-      - rolebindings
-      - roles
-    verbs:
-      - create
-      - delete
-      - get
-      - list
-      - patch
-      - update
-      - watch
+- apiGroups:
+  - ""
+  resources:
+  - configmaps
+  - secrets
+  - serviceaccounts
+  - services
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - ""
+  resources:
+  - pods
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - apps
+  resources:
+  - deployments
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - cacheservers
+  - loraadapters
+  - vllmrouters
+  - vllmruntimes
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - cacheservers/finalizers
+  - loraadapters/finalizers
+  - vllmrouters/finalizers
+  - vllmruntimes/finalizers
+  verbs:
+  - update
+- apiGroups:
+  - production-stack.vllm.ai
+  resources:
+  - cacheservers/status
+  - loraadapters/status
+  - vllmrouters/status
+  - vllmruntimes/status
+  verbs:
+  - get
+  - patch
+  - update
+- apiGroups:
+  - rbac.authorization.k8s.io
+  resources:
+  - rolebindings
+  - roles
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
diff --git a/operator/internal/controller/vllmrouter_controller.go b/operator/internal/controller/vllmrouter_controller.go
index 793ffb0f8..91320091c 100644
--- a/operator/internal/controller/vllmrouter_controller.go
+++ b/operator/internal/controller/vllmrouter_controller.go
@@ -78,7 +78,7 @@ func (r *VLLMRouterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 
 	// Create ServiceAccount if it doesn't exist
 	sa := &corev1.ServiceAccount{}
-	err = r.Get(ctx, types.NamespacedName{Name: "vllmrouter-sa", Namespace: router.Namespace}, sa)
+	err = r.Get(ctx, types.NamespacedName{Name: router.Spec.ServiceAccountName, Namespace: router.Namespace}, sa)
 	if err != nil && errors.IsNotFound(err) {
 		sa = r.serviceAccountForVLLMRouter(router)
 		log.Info("Creating a new ServiceAccount", "ServiceAccount.Namespace", sa.Namespace, "ServiceAccount.Name", sa.Name)
@@ -442,7 +442,7 @@ func (r *VLLMRouterReconciler) serviceForVLLMRouter(router *servingv1alpha1.VLLM
 func (r *VLLMRouterReconciler) serviceAccountForVLLMRouter(router *servingv1alpha1.VLLMRouter) *corev1.ServiceAccount {
 	sa := &corev1.ServiceAccount{
 		ObjectMeta: metav1.ObjectMeta{
-			Name:      "vllmrouter-sa",
+			Name:      router.Spec.ServiceAccountName,
 			Namespace: router.Namespace,
 			Labels: map[string]string{
 				"app.kubernetes.io/name":       "production-stack",
@@ -491,7 +491,7 @@ func (r *VLLMRouterReconciler) roleBindingForVLLMRouter(router *servingv1alpha1.
 		Subjects: []rbacv1.Subject{
 			{
 				Kind:      "ServiceAccount",
-				Name:      "vllmrouter-sa",
+				Name:      router.Spec.ServiceAccountName,
 				Namespace: router.Namespace,
 			},
 		},

From d7fa743c563b52657de65c98681aa9fd757a4d5f Mon Sep 17 00:00:00 2001
From: royyhuang <roy.y.huang@gmail.com>
Date: Sat, 7 Jun 2025 06:05:06 +0000
Subject: [PATCH 12/14] update sample loraadapter manifest

Signed-off-by: royyhuang <roy.y.huang@gmail.com>
---
 .../samples/production-stack_v1alpha1_loraadapter.yaml      | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/operator/config/samples/production-stack_v1alpha1_loraadapter.yaml b/operator/config/samples/production-stack_v1alpha1_loraadapter.yaml
index d83e0a847..960017ab8 100644
--- a/operator/config/samples/production-stack_v1alpha1_loraadapter.yaml
+++ b/operator/config/samples/production-stack_v1alpha1_loraadapter.yaml
@@ -17,9 +17,9 @@ spec:
   # vllmApiKey:
   #   value: "abc123"
   adapterSource:
-    type: "local"  # (local, huggingface, s3) for now we only support local
-    adapterName: "llama-3.1-nemoguard-8b-topic-control"  # This will be the adapter ID
+    type: "local" # (local, huggingface, s3) for now we only support local
+    adapterName: "llama-3.1-nemoguard-8b-topic-control" # This will be the adapter ID
     adapterPath: "/data/lora-adapters/llama-3.1-nemoguard-8b-topic-control" # This will be the path to the adapter in the persistent volume
-  deploymentConfig:
+  loraAdapterDeploymentConfig:
     algorithm: "default" # for now we only support default algorithm
     replicas: 1 # if not specified, by default algorithm, the lora adapter will be applied to all llama3-8b models, if specified, the lora adapter will only be applied to the specified number of replicas

From fed80a87f4f9dd1ad0467af26d7c8770cc784f99 Mon Sep 17 00:00:00 2001
From: royyhuang <roy.y.huang@gmail.com>
Date: Sat, 7 Jun 2025 06:27:23 +0000
Subject: [PATCH 13/14] update default controller image

Signed-off-by: royyhuang <roy.y.huang@gmail.com>
---
 operator/config/default.yaml                     |  2 +-
 operator/config/default/kustomization.yaml       |  5 +++++
 operator/config/default/manager_image_patch.yaml | 11 +++++++++++
 3 files changed, 17 insertions(+), 1 deletion(-)
 create mode 100644 operator/config/default/manager_image_patch.yaml

diff --git a/operator/config/default.yaml b/operator/config/default.yaml
index 85242a851..5b71fb8ec 100644
--- a/operator/config/default.yaml
+++ b/operator/config/default.yaml
@@ -1403,7 +1403,7 @@ spec:
         - --health-probe-bind-address=:8081
         command:
         - /manager
-        image: 1nfinity/production-stack-controller:latest
+        image: lmcache/production-stack-operator:latest
         imagePullPolicy: Always
         livenessProbe:
           httpGet:
diff --git a/operator/config/default/kustomization.yaml b/operator/config/default/kustomization.yaml
index a4089b35f..b092f90d4 100644
--- a/operator/config/default/kustomization.yaml
+++ b/operator/config/default/kustomization.yaml
@@ -41,6 +41,11 @@ patches:
     target:
       kind: Deployment
 
+  # Set the controller image
+  - path: manager_image_patch.yaml
+    target:
+      kind: Deployment
+
   # Uncomment the patches line if you enable Metrics and CertManager
   # [METRICS-WITH-CERTS] To enable metrics protected with certManager, uncomment the following line.
   # This patch will protect the metrics with certManager self-signed certs.
diff --git a/operator/config/default/manager_image_patch.yaml b/operator/config/default/manager_image_patch.yaml
new file mode 100644
index 000000000..ec444896c
--- /dev/null
+++ b/operator/config/default/manager_image_patch.yaml
@@ -0,0 +1,11 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: controller-manager
+  namespace: system
+spec:
+  template:
+    spec:
+      containers:
+        - name: manager
+          image: lmcache/production-stack-operator:latest

From c5793d33d02a518a1f1b6f5bb05323ea7dda1d04 Mon Sep 17 00:00:00 2001
From: royyhuang <roy.y.huang@gmail.com>
Date: Sat, 7 Jun 2025 06:30:35 +0000
Subject: [PATCH 14/14] remove old image name patch

Signed-off-by: royyhuang <roy.y.huang@gmail.com>
---
 operator/config/default.yaml               | 2229 ++++++++++----------
 operator/config/manager/kustomization.yaml |   10 +-
 2 files changed, 1130 insertions(+), 1109 deletions(-)

diff --git a/operator/config/default.yaml b/operator/config/default.yaml
index 5b71fb8ec..cce3f977a 100644
--- a/operator/config/default.yaml
+++ b/operator/config/default.yaml
@@ -22,103 +22,103 @@ spec:
     singular: cacheserver
   scope: Namespaced
   versions:
-  - additionalPrinterColumns:
-    - jsonPath: .status.status
-      name: Status
-      type: string
-    - jsonPath: .metadata.creationTimestamp
-      name: Age
-      type: date
-    name: v1alpha1
-    schema:
-      openAPIV3Schema:
-        description: CacheServer is the Schema for the cacheservers API
-        properties:
-          apiVersion:
-            description: |-
-              APIVersion defines the versioned schema of this representation of an object.
-              Servers should convert recognized schemas to the latest internal value, and
-              may reject unrecognized values.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
-            type: string
-          kind:
-            description: |-
-              Kind is a string value representing the REST resource this object represents.
-              Servers may infer this from the endpoint the client submits requests to.
-              Cannot be updated.
-              In CamelCase.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
-            type: string
-          metadata:
-            type: object
-          spec:
-            description: CacheServerSpec defines the desired state of CacheServer
-            properties:
-              deploymentStrategy:
-                default: RollingUpdate
-                description: Deployment strategy
-                enum:
-                - RollingUpdate
-                - Recreate
-                type: string
-              image:
-                description: Image configuration for the cache server
-                properties:
-                  name:
-                    type: string
-                  pullPolicy:
-                    type: string
-                  pullSecretName:
-                    type: string
-                  registry:
-                    type: string
-                required:
-                - name
-                - registry
-                type: object
-              port:
-                default: 8000
-                description: Container port for the cache server
-                format: int32
-                type: integer
-              replicas:
-                default: 1
-                description: Number of replicas
-                format: int32
-                type: integer
-              resources:
-                description: Resource requirements
-                properties:
-                  cpu:
-                    type: string
-                  gpu:
-                    type: string
-                  memory:
-                    type: string
-                type: object
-            required:
-            - deploymentStrategy
-            - image
-            - port
-            - replicas
-            - resources
-            type: object
-          status:
-            description: CacheServerStatus defines the observed state of CacheServer
-            properties:
-              lastUpdated:
-                description: Last time the status was updated
-                format: date-time
-                type: string
-              status:
-                description: Current status of the cache server
-                type: string
-            type: object
-        type: object
-    served: true
-    storage: true
-    subresources:
-      status: {}
+    - additionalPrinterColumns:
+        - jsonPath: .status.status
+          name: Status
+          type: string
+        - jsonPath: .metadata.creationTimestamp
+          name: Age
+          type: date
+      name: v1alpha1
+      schema:
+        openAPIV3Schema:
+          description: CacheServer is the Schema for the cacheservers API
+          properties:
+            apiVersion:
+              description: |-
+                APIVersion defines the versioned schema of this representation of an object.
+                Servers should convert recognized schemas to the latest internal value, and
+                may reject unrecognized values.
+                More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+              type: string
+            kind:
+              description: |-
+                Kind is a string value representing the REST resource this object represents.
+                Servers may infer this from the endpoint the client submits requests to.
+                Cannot be updated.
+                In CamelCase.
+                More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+              type: string
+            metadata:
+              type: object
+            spec:
+              description: CacheServerSpec defines the desired state of CacheServer
+              properties:
+                deploymentStrategy:
+                  default: RollingUpdate
+                  description: Deployment strategy
+                  enum:
+                    - RollingUpdate
+                    - Recreate
+                  type: string
+                image:
+                  description: Image configuration for the cache server
+                  properties:
+                    name:
+                      type: string
+                    pullPolicy:
+                      type: string
+                    pullSecretName:
+                      type: string
+                    registry:
+                      type: string
+                  required:
+                    - name
+                    - registry
+                  type: object
+                port:
+                  default: 8000
+                  description: Container port for the cache server
+                  format: int32
+                  type: integer
+                replicas:
+                  default: 1
+                  description: Number of replicas
+                  format: int32
+                  type: integer
+                resources:
+                  description: Resource requirements
+                  properties:
+                    cpu:
+                      type: string
+                    gpu:
+                      type: string
+                    memory:
+                      type: string
+                  type: object
+              required:
+                - deploymentStrategy
+                - image
+                - port
+                - replicas
+                - resources
+              type: object
+            status:
+              description: CacheServerStatus defines the observed state of CacheServer
+              properties:
+                lastUpdated:
+                  description: Last time the status was updated
+                  format: date-time
+                  type: string
+                status:
+                  description: Current status of the cache server
+                  type: string
+              type: object
+          type: object
+      served: true
+      storage: true
+      subresources:
+        status: {}
 ---
 apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
@@ -135,241 +135,260 @@ spec:
     singular: loraadapter
   scope: Namespaced
   versions:
-  - additionalPrinterColumns:
-    - jsonPath: .status.phase
-      name: Phase
-      type: string
-    - jsonPath: .metadata.creationTimestamp
-      name: Age
-      type: date
-    name: v1alpha1
-    schema:
-      openAPIV3Schema:
-        description: LoraAdapter is the Schema for the loraadapters API.
-        properties:
-          apiVersion:
-            description: |-
-              APIVersion defines the versioned schema of this representation of an object.
-              Servers should convert recognized schemas to the latest internal value, and
-              may reject unrecognized values.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
-            type: string
-          kind:
-            description: |-
-              Kind is a string value representing the REST resource this object represents.
-              Servers may infer this from the endpoint the client submits requests to.
-              Cannot be updated.
-              In CamelCase.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
-            type: string
-          metadata:
-            type: object
-          spec:
-            description: LoraAdapterSpec defines the desired state of LoraAdapter.
-            properties:
-              adapterSource:
-                description: AdapterSource defines where to get the LoRA adapter from.
-                properties:
-                  adapterName:
-                    description: AdapterName is the name of the adapter to apply.
-                    type: string
-                  adapterPath:
-                    description: 'AdapterPath is the path to the LoRA adapter weights.
-                      For local sources: required, specifies the path to the adapter
-                      For remote sources: optional, will be updated by the controller
-                      with the download path'
-                    type: string
-                  credentialsSecretRef:
-                    description: CredentialsSecretRef references a secret containing
-                      storage credentials.
-                    properties:
-                      name:
-                        description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
-                          TODO: Add other useful fields. apiVersion, kind, uid?'
-                        type: string
-                    type: object
-                    x-kubernetes-map-type: atomic
-                  maxAdapters:
-                    description: MaxAdapters is the maximum number of adapters to
-                      load.
-                    format: int32
-                    type: integer
-                  pattern:
-                    description: Pattern is the pattern to use for the adapter name.
-                    type: string
-                  repository:
-                    description: Repository is the repository to get the LoRA adapter
-                      from.
-                    type: string
-                  type:
-                    description: Type is the type of the adapter source.
-                    enum:
-                    - local
-                    - s3
-                    - http
-                    - huggingface
-                    type: string
-                required:
-                - adapterName
-                - type
-                type: object
-              baseModel:
-                description: BaseModel is the name of the base model this adapter
-                  is for.
-                type: string
-              loraAdapterDeploymentConfig:
-                description: DeploymentConfig defines how the adapter should be deployed
-                properties:
-                  algorithm:
-                    default: default
-                    description: Algorithm specifies which placement algorithm to
-                      use.
-                    enum:
-                    - default
-                    - ordered
-                    - equalized
-                    type: string
-                  replicas:
-                    description: Replicas is the number of replicas that should load
-                      this adapter.
-                    format: int32
-                    minimum: 0
-                    type: integer
-                required:
-                - algorithm
-                type: object
-              vllmApiKey:
-                description: VLLMApiKey defines the configuration for vLLM API key
-                  authentication
-                properties:
-                  secretRef:
-                    description: Reference to a secret containing the API key
-                    properties:
-                      secretKey:
-                        description: Key in the secret containing the API key
-                        type: string
-                      secretName:
-                        description: Name of the secret
-                        type: string
-                    required:
-                    - secretKey
-                    - secretName
-                    type: object
-                  value:
-                    description: Direct API key value
-                    type: string
-                type: object
-            required:
-            - adapterSource
-            - baseModel
-            type: object
-          status:
-            description: LoraAdapterStatus defines the observed state of LoraAdapter.
-            properties:
-              conditions:
-                description: Condition contains details for one aspect of the current
-                  state of this API Resource.
-                items:
-                  description: Condition contains details for one aspect of the current
-                    state of this API Resource.
+    - additionalPrinterColumns:
+        - jsonPath: .status.phase
+          name: Phase
+          type: string
+        - jsonPath: .metadata.creationTimestamp
+          name: Age
+          type: date
+      name: v1alpha1
+      schema:
+        openAPIV3Schema:
+          description: LoraAdapter is the Schema for the loraadapters API.
+          properties:
+            apiVersion:
+              description: |-
+                APIVersion defines the versioned schema of this representation of an object.
+                Servers should convert recognized schemas to the latest internal value, and
+                may reject unrecognized values.
+                More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+              type: string
+            kind:
+              description: |-
+                Kind is a string value representing the REST resource this object represents.
+                Servers may infer this from the endpoint the client submits requests to.
+                Cannot be updated.
+                In CamelCase.
+                More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+              type: string
+            metadata:
+              type: object
+            spec:
+              description: LoraAdapterSpec defines the desired state of LoraAdapter.
+              properties:
+                adapterSource:
+                  description: AdapterSource defines where to get the LoRA adapter from.
                   properties:
-                    lastTransitionTime:
-                      description: LastTransitionTime is the last time the condition
-                        transitioned from one status to another.
-                      format: date-time
+                    adapterName:
+                      description: AdapterName is the name of the adapter to apply.
                       type: string
-                    message:
-                      description: Message is a human-readable message indicating
-                        details about why the current state is set.
-                      maxLength: 32768
+                    adapterPath:
+                      description:
+                        "AdapterPath is the path to the LoRA adapter weights.
+                        For local sources: required, specifies the path to the adapter
+                        For remote sources: optional, will be updated by the controller
+                        with the download path"
                       type: string
-                    reason:
-                      description: Reason is a brief reason for the condition's current
-                        status.
-                      maxLength: 1024
-                      minLength: 1
-                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                    credentialsSecretRef:
+                      description:
+                        CredentialsSecretRef references a secret containing
+                        storage credentials.
+                      properties:
+                        name:
+                          description:
+                            "Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                            TODO: Add other useful fields. apiVersion, kind, uid?"
+                          type: string
+                      type: object
+                      x-kubernetes-map-type: atomic
+                    maxAdapters:
+                      description:
+                        MaxAdapters is the maximum number of adapters to
+                        load.
+                      format: int32
+                      type: integer
+                    pattern:
+                      description: Pattern is the pattern to use for the adapter name.
                       type: string
-                    status:
-                      description: Status is the status of the condition.
-                      enum:
-                      - "True"
-                      - "False"
-                      - Unknown
+                    repository:
+                      description:
+                        Repository is the repository to get the LoRA adapter
+                        from.
                       type: string
                     type:
-                      description: type of condition in CamelCase.
-                      maxLength: 316
-                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                      description: Type is the type of the adapter source.
+                      enum:
+                        - local
+                        - s3
+                        - http
+                        - huggingface
                       type: string
                   required:
-                  - lastTransitionTime
-                  - message
-                  - reason
-                  - status
-                  - type
+                    - adapterName
+                    - type
                   type: object
-                type: array
-              loadedAdapters:
-                description: LoadedAdapters tracks the loading status of adapters
-                  and their pod assignments.
-                items:
-                  description: LoadedAdapter represents an adapter that has been loaded
-                    into a pod
+                baseModel:
+                  description:
+                    BaseModel is the name of the base model this adapter
+                    is for.
+                  type: string
+                loraAdapterDeploymentConfig:
+                  description: DeploymentConfig defines how the adapter should be deployed
                   properties:
-                    loadTime:
-                      description: LoadTime is when the adapter was loaded
-                      format: date-time
-                      type: string
-                    name:
-                      description: Name is the name of the adapter
-                      type: string
-                    path:
-                      description: Path is the path where the adapter is loaded
+                    algorithm:
+                      default: default
+                      description:
+                        Algorithm specifies which placement algorithm to
+                        use.
+                      enum:
+                        - default
+                        - ordered
+                        - equalized
                       type: string
-                    podAssignments:
-                      description: PodAssignments represents the pods this adapter
-                        has been assigned to
+                    replicas:
+                      description:
+                        Replicas is the number of replicas that should load
+                        this adapter.
+                      format: int32
+                      minimum: 0
+                      type: integer
+                  required:
+                    - algorithm
+                  type: object
+                vllmApiKey:
+                  description:
+                    VLLMApiKey defines the configuration for vLLM API key
+                    authentication
+                  properties:
+                    secretRef:
+                      description: Reference to a secret containing the API key
                       properties:
-                        namespace:
-                          description: Namespace is the namespace of the pod
+                        secretKey:
+                          description: Key in the secret containing the API key
                           type: string
-                        podName:
-                          description: Pod represents the pod information
+                        secretName:
+                          description: Name of the secret
                           type: string
                       required:
-                      - namespace
-                      - podName
+                        - secretKey
+                        - secretName
                       type: object
-                    status:
-                      description: Status is the status of the adapter
+                    value:
+                      description: Direct API key value
                       type: string
-                  required:
-                  - name
-                  - path
-                  - podAssignments
-                  - status
                   type: object
-                type: array
-              message:
-                description: Message provides additional information about the current
-                  phase.
-                type: string
-              observedGeneration:
-                description: ObservedGeneration represents the .metadata.generation
-                  that the condition was set based upon.
-                format: int64
-                minimum: 0
-                type: integer
-              phase:
-                description: Phase represents the current phase of the adapter deployment.
-                type: string
-            type: object
-        type: object
-    served: true
-    storage: true
-    subresources:
-      status: {}
+              required:
+                - adapterSource
+                - baseModel
+              type: object
+            status:
+              description: LoraAdapterStatus defines the observed state of LoraAdapter.
+              properties:
+                conditions:
+                  description:
+                    Condition contains details for one aspect of the current
+                    state of this API Resource.
+                  items:
+                    description:
+                      Condition contains details for one aspect of the current
+                      state of this API Resource.
+                    properties:
+                      lastTransitionTime:
+                        description:
+                          LastTransitionTime is the last time the condition
+                          transitioned from one status to another.
+                        format: date-time
+                        type: string
+                      message:
+                        description:
+                          Message is a human-readable message indicating
+                          details about why the current state is set.
+                        maxLength: 32768
+                        type: string
+                      reason:
+                        description:
+                          Reason is a brief reason for the condition's current
+                          status.
+                        maxLength: 1024
+                        minLength: 1
+                        pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                        type: string
+                      status:
+                        description: Status is the status of the condition.
+                        enum:
+                          - "True"
+                          - "False"
+                          - Unknown
+                        type: string
+                      type:
+                        description: type of condition in CamelCase.
+                        maxLength: 316
+                        pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                        type: string
+                    required:
+                      - lastTransitionTime
+                      - message
+                      - reason
+                      - status
+                      - type
+                    type: object
+                  type: array
+                loadedAdapters:
+                  description:
+                    LoadedAdapters tracks the loading status of adapters
+                    and their pod assignments.
+                  items:
+                    description:
+                      LoadedAdapter represents an adapter that has been loaded
+                      into a pod
+                    properties:
+                      loadTime:
+                        description: LoadTime is when the adapter was loaded
+                        format: date-time
+                        type: string
+                      name:
+                        description: Name is the name of the adapter
+                        type: string
+                      path:
+                        description: Path is the path where the adapter is loaded
+                        type: string
+                      podAssignments:
+                        description:
+                          PodAssignments represents the pods this adapter
+                          has been assigned to
+                        properties:
+                          namespace:
+                            description: Namespace is the namespace of the pod
+                            type: string
+                          podName:
+                            description: Pod represents the pod information
+                            type: string
+                        required:
+                          - namespace
+                          - podName
+                        type: object
+                      status:
+                        description: Status is the status of the adapter
+                        type: string
+                    required:
+                      - name
+                      - path
+                      - podAssignments
+                      - status
+                    type: object
+                  type: array
+                message:
+                  description:
+                    Message provides additional information about the current
+                    phase.
+                  type: string
+                observedGeneration:
+                  description:
+                    ObservedGeneration represents the .metadata.generation
+                    that the condition was set based upon.
+                  format: int64
+                  minimum: 0
+                  type: integer
+                phase:
+                  description: Phase represents the current phase of the adapter deployment.
+                  type: string
+              type: object
+          type: object
+      served: true
+      storage: true
+      subresources:
+        status: {}
 ---
 apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
@@ -386,247 +405,252 @@ spec:
     singular: vllmrouter
   scope: Namespaced
   versions:
-  - name: v1alpha1
-    schema:
-      openAPIV3Schema:
-        description: VLLMRouter is the Schema for the vllmrouters API
-        properties:
-          apiVersion:
-            description: |-
-              APIVersion defines the versioned schema of this representation of an object.
-              Servers should convert recognized schemas to the latest internal value, and
-              may reject unrecognized values.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
-            type: string
-          kind:
-            description: |-
-              Kind is a string value representing the REST resource this object represents.
-              Servers may infer this from the endpoint the client submits requests to.
-              Cannot be updated.
-              In CamelCase.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
-            type: string
-          metadata:
-            type: object
-          spec:
-            description: VLLMRouterSpec defines the desired state of VLLMRouter
-            properties:
-              enableRouter:
-                default: true
-                description: EnableRouter determines if the router should be deployed
-                type: boolean
-              engineScrapeInterval:
-                description: EngineScrapeInterval for collecting engine statistics
-                format: int32
-                type: integer
-              env:
-                description: Environment variables
-                items:
-                  description: EnvVar represents an environment variable
+    - name: v1alpha1
+      schema:
+        openAPIV3Schema:
+          description: VLLMRouter is the Schema for the vllmrouters API
+          properties:
+            apiVersion:
+              description: |-
+                APIVersion defines the versioned schema of this representation of an object.
+                Servers should convert recognized schemas to the latest internal value, and
+                may reject unrecognized values.
+                More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+              type: string
+            kind:
+              description: |-
+                Kind is a string value representing the REST resource this object represents.
+                Servers may infer this from the endpoint the client submits requests to.
+                Cannot be updated.
+                In CamelCase.
+                More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+              type: string
+            metadata:
+              type: object
+            spec:
+              description: VLLMRouterSpec defines the desired state of VLLMRouter
+              properties:
+                enableRouter:
+                  default: true
+                  description: EnableRouter determines if the router should be deployed
+                  type: boolean
+                engineScrapeInterval:
+                  description: EngineScrapeInterval for collecting engine statistics
+                  format: int32
+                  type: integer
+                env:
+                  description: Environment variables
+                  items:
+                    description: EnvVar represents an environment variable
+                    properties:
+                      name:
+                        type: string
+                      value:
+                        type: string
+                    required:
+                      - name
+                      - value
+                    type: object
+                  type: array
+                extraArgs:
+                  description: ExtraArgs for additional router arguments
+                  items:
+                    type: string
+                  type: array
+                image:
+                  description: Image configuration
                   properties:
                     name:
                       type: string
-                    value:
+                    pullPolicy:
+                      type: string
+                    pullSecretName:
+                      type: string
+                    registry:
                       type: string
                   required:
-                  - name
-                  - value
+                    - name
+                    - registry
                   type: object
-                type: array
-              extraArgs:
-                description: ExtraArgs for additional router arguments
-                items:
+                k8sLabelSelector:
+                  description:
+                    K8sLabelSelector specifies the label selector for vLLM
+                    runtime pods when using k8s service discovery
                   type: string
-                type: array
-              image:
-                description: Image configuration
-                properties:
-                  name:
-                    type: string
-                  pullPolicy:
-                    type: string
-                  pullSecretName:
-                    type: string
-                  registry:
-                    type: string
-                required:
-                - name
-                - registry
-                type: object
-              k8sLabelSelector:
-                description: K8sLabelSelector specifies the label selector for vLLM
-                  runtime pods when using k8s service discovery
-                type: string
-              nodeSelectorTerms:
-                description: NodeSelectorTerms for pod scheduling
-                items:
-                  description: |-
-                    A null or empty node selector term matches no objects. The requirements of
-                    them are ANDed.
-                    The TopologySelectorTerm type implements a subset of the NodeSelectorTerm.
-                  properties:
-                    matchExpressions:
-                      description: A list of node selector requirements by node's
-                        labels.
-                      items:
-                        description: |-
-                          A node selector requirement is a selector that contains values, a key, and an operator
-                          that relates the key and values.
-                        properties:
-                          key:
-                            description: The label key that the selector applies to.
-                            type: string
-                          operator:
-                            description: |-
-                              Represents a key's relationship to a set of values.
-                              Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
-                            type: string
-                          values:
-                            description: |-
-                              An array of string values. If the operator is In or NotIn,
-                              the values array must be non-empty. If the operator is Exists or DoesNotExist,
-                              the values array must be empty. If the operator is Gt or Lt, the values
-                              array must have a single element, which will be interpreted as an integer.
-                              This array is replaced during a strategic merge patch.
-                            items:
+                nodeSelectorTerms:
+                  description: NodeSelectorTerms for pod scheduling
+                  items:
+                    description: |-
+                      A null or empty node selector term matches no objects. The requirements of
+                      them are ANDed.
+                      The TopologySelectorTerm type implements a subset of the NodeSelectorTerm.
+                    properties:
+                      matchExpressions:
+                        description:
+                          A list of node selector requirements by node's
+                          labels.
+                        items:
+                          description: |-
+                            A node selector requirement is a selector that contains values, a key, and an operator
+                            that relates the key and values.
+                          properties:
+                            key:
+                              description: The label key that the selector applies to.
                               type: string
-                            type: array
-                            x-kubernetes-list-type: atomic
-                        required:
-                        - key
-                        - operator
-                        type: object
-                      type: array
-                      x-kubernetes-list-type: atomic
-                    matchFields:
-                      description: A list of node selector requirements by node's
-                        fields.
-                      items:
-                        description: |-
-                          A node selector requirement is a selector that contains values, a key, and an operator
-                          that relates the key and values.
-                        properties:
-                          key:
-                            description: The label key that the selector applies to.
-                            type: string
-                          operator:
-                            description: |-
-                              Represents a key's relationship to a set of values.
-                              Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
-                            type: string
-                          values:
-                            description: |-
-                              An array of string values. If the operator is In or NotIn,
-                              the values array must be non-empty. If the operator is Exists or DoesNotExist,
-                              the values array must be empty. If the operator is Gt or Lt, the values
-                              array must have a single element, which will be interpreted as an integer.
-                              This array is replaced during a strategic merge patch.
-                            items:
+                            operator:
+                              description: |-
+                                Represents a key's relationship to a set of values.
+                                Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
                               type: string
-                            type: array
-                            x-kubernetes-list-type: atomic
-                        required:
-                        - key
-                        - operator
-                        type: object
-                      type: array
-                      x-kubernetes-list-type: atomic
+                            values:
+                              description: |-
+                                An array of string values. If the operator is In or NotIn,
+                                the values array must be non-empty. If the operator is Exists or DoesNotExist,
+                                the values array must be empty. If the operator is Gt or Lt, the values
+                                array must have a single element, which will be interpreted as an integer.
+                                This array is replaced during a strategic merge patch.
+                              items:
+                                type: string
+                              type: array
+                              x-kubernetes-list-type: atomic
+                          required:
+                            - key
+                            - operator
+                          type: object
+                        type: array
+                        x-kubernetes-list-type: atomic
+                      matchFields:
+                        description:
+                          A list of node selector requirements by node's
+                          fields.
+                        items:
+                          description: |-
+                            A node selector requirement is a selector that contains values, a key, and an operator
+                            that relates the key and values.
+                          properties:
+                            key:
+                              description: The label key that the selector applies to.
+                              type: string
+                            operator:
+                              description: |-
+                                Represents a key's relationship to a set of values.
+                                Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+                              type: string
+                            values:
+                              description: |-
+                                An array of string values. If the operator is In or NotIn,
+                                the values array must be non-empty. If the operator is Exists or DoesNotExist,
+                                the values array must be empty. If the operator is Gt or Lt, the values
+                                array must have a single element, which will be interpreted as an integer.
+                                This array is replaced during a strategic merge patch.
+                              items:
+                                type: string
+                              type: array
+                              x-kubernetes-list-type: atomic
+                          required:
+                            - key
+                            - operator
+                          type: object
+                        type: array
+                        x-kubernetes-list-type: atomic
+                    type: object
+                    x-kubernetes-map-type: atomic
+                  type: array
+                port:
+                  default: 80
+                  description: ContainerPort for the router service
+                  format: int32
+                  type: integer
+                replicas:
+                  default: 1
+                  description: Replicas specifies the number of router replicas
+                  format: int32
+                  type: integer
+                requestStatsWindow:
+                  description: RequestStatsWindow for request statistics
+                  format: int32
+                  type: integer
+                resources:
+                  description: Resource requirements
+                  properties:
+                    cpu:
+                      type: string
+                    gpu:
+                      type: string
+                    memory:
+                      type: string
+                  type: object
+                routingLogic:
+                  default: roundrobin
+                  description: RoutingLogic specifies the routing strategy
+                  enum:
+                    - roundrobin
+                    - session
+                  type: string
+                serviceAccountName:
+                  description: ServiceAccountName for the router pod
+                  type: string
+                serviceDiscovery:
+                  default: k8s
+                  description:
+                    ServiceDiscovery specifies the service discovery method
+                    (k8s or static)
+                  enum:
+                    - k8s
+                    - static
+                  type: string
+                sessionKey:
+                  default: ""
+                  description: SessionKey for session-based routing
+                  type: string
+                staticBackends:
+                  description:
+                    StaticBackends is required when using static service
+                    discovery
+                  type: string
+                staticModels:
+                  description: StaticModels is required when using static service discovery
+                  type: string
+                vllmApiKeyName:
+                  type: string
+                vllmApiKeySecret:
+                  description: VLLM API Key configuration
+                  properties:
+                    name:
+                      default: ""
+                      description: |-
+                        Name of the referent.
+                        This field is effectively required, but due to backwards compatibility is
+                        allowed to be empty. Instances of this type with an empty value here are
+                        almost certainly wrong.
+                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                      type: string
                   type: object
                   x-kubernetes-map-type: atomic
-                type: array
-              port:
-                default: 80
-                description: ContainerPort for the router service
-                format: int32
-                type: integer
-              replicas:
-                default: 1
-                description: Replicas specifies the number of router replicas
-                format: int32
-                type: integer
-              requestStatsWindow:
-                description: RequestStatsWindow for request statistics
-                format: int32
-                type: integer
-              resources:
-                description: Resource requirements
-                properties:
-                  cpu:
-                    type: string
-                  gpu:
-                    type: string
-                  memory:
-                    type: string
-                type: object
-              routingLogic:
-                default: roundrobin
-                description: RoutingLogic specifies the routing strategy
-                enum:
-                - roundrobin
-                - session
-                type: string
-              serviceAccountName:
-                description: ServiceAccountName for the router pod
-                type: string
-              serviceDiscovery:
-                default: k8s
-                description: ServiceDiscovery specifies the service discovery method
-                  (k8s or static)
-                enum:
-                - k8s
-                - static
-                type: string
-              sessionKey:
-                default: ""
-                description: SessionKey for session-based routing
-                type: string
-              staticBackends:
-                description: StaticBackends is required when using static service
-                  discovery
-                type: string
-              staticModels:
-                description: StaticModels is required when using static service discovery
-                type: string
-              vllmApiKeyName:
-                type: string
-              vllmApiKeySecret:
-                description: VLLM API Key configuration
-                properties:
-                  name:
-                    default: ""
-                    description: |-
-                      Name of the referent.
-                      This field is effectively required, but due to backwards compatibility is
-                      allowed to be empty. Instances of this type with an empty value here are
-                      almost certainly wrong.
-                      More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
-                    type: string
-                type: object
-                x-kubernetes-map-type: atomic
-            required:
-            - image
-            - resources
-            type: object
-          status:
-            description: VLLMRouterStatus defines the observed state of VLLMRouter
-            properties:
-              activeRuntimes:
-                description: Number of active runtimes
-                format: int32
-                type: integer
-              lastUpdated:
-                description: Last updated timestamp
-                format: date-time
-                type: string
-              status:
-                description: Router status
-                type: string
-            type: object
-        type: object
-    served: true
-    storage: true
-    subresources:
-      status: {}
+              required:
+                - image
+                - resources
+              type: object
+            status:
+              description: VLLMRouterStatus defines the observed state of VLLMRouter
+              properties:
+                activeRuntimes:
+                  description: Number of active runtimes
+                  format: int32
+                  type: integer
+                lastUpdated:
+                  description: Last updated timestamp
+                  format: date-time
+                  type: string
+                status:
+                  description: Router status
+                  type: string
+              type: object
+          type: object
+      served: true
+      storage: true
+      subresources:
+        status: {}
 ---
 apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
@@ -641,219 +665,222 @@ spec:
     listKind: VLLMRuntimeList
     plural: vllmruntimes
     shortNames:
-    - vr
+      - vr
     singular: vllmruntime
   scope: Namespaced
   versions:
-  - name: v1alpha1
-    schema:
-      openAPIV3Schema:
-        description: VLLMRuntime is the Schema for the vllmruntimes API
-        properties:
-          apiVersion:
-            description: |-
-              APIVersion defines the versioned schema of this representation of an object.
-              Servers should convert recognized schemas to the latest internal value, and
-              may reject unrecognized values.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
-            type: string
-          kind:
-            description: |-
-              Kind is a string value representing the REST resource this object represents.
-              Servers may infer this from the endpoint the client submits requests to.
-              Cannot be updated.
-              In CamelCase.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
-            type: string
-          metadata:
-            type: object
-          spec:
-            description: VLLMRuntimeSpec defines the desired state of VLLMRuntime
-            properties:
-              deploymentConfig:
-                description: Deployment configuration
-                properties:
-                  deploymentStrategy:
-                    default: RollingUpdate
-                    description: Deploy strategy
-                    enum:
-                    - RollingUpdate
-                    - Recreate
-                    type: string
-                  image:
-                    description: Image configuration
-                    properties:
-                      name:
-                        type: string
-                      pullPolicy:
-                        type: string
-                      pullSecretName:
-                        type: string
-                      registry:
-                        type: string
-                    required:
-                    - name
-                    - registry
-                    type: object
-                  replicas:
-                    default: 1
-                    description: Replicas
-                    format: int32
-                    type: integer
-                  resources:
-                    description: Resource requirements
-                    properties:
-                      cpu:
-                        type: string
-                      gpu:
-                        type: string
-                      memory:
-                        type: string
-                    type: object
-                required:
-                - image
-                - resources
-                type: object
-              lmCacheConfig:
-                description: LM Cache configuration
-                properties:
-                  cpuOffloadingBufferSize:
-                    default: 4Gi
-                    description: CPUOffloadingBufferSize is the size of the CPU offloading
-                      buffer
-                    type: string
-                  diskOffloadingBufferSize:
-                    default: 8Gi
-                    description: DiskOffloadingBufferSize is the size of the disk
-                      offloading buffer
-                    type: string
-                  enabled:
-                    default: false
-                    description: Enabled enables LM Cache
-                    type: boolean
-                  remoteSerde:
-                    description: RemoteSerde is the serialization format for the remote
-                      cache
-                    type: string
-                  remoteUrl:
-                    description: RemoteURL is the URL of the remote cache server
-                    type: string
-                type: object
-              model:
-                description: Model configuration
-                properties:
-                  dtype:
-                    description: Data type
-                    type: string
-                  enableLoRA:
-                    description: Enable LoRA
-                    type: boolean
-                  enableTool:
-                    description: Enable tool
-                    type: boolean
-                  hfTokenName:
-                    default: token
-                    type: string
-                  hfTokenSecret:
-                    description: HuggingFace token secret
-                    properties:
-                      name:
-                        default: ""
-                        description: |-
-                          Name of the referent.
-                          This field is effectively required, but due to backwards compatibility is
-                          allowed to be empty. Instances of this type with an empty value here are
-                          almost certainly wrong.
-                          More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
-                        type: string
-                    type: object
-                    x-kubernetes-map-type: atomic
-                  maxModelLen:
-                    description: Maximum model length
-                    format: int32
-                    type: integer
-                  maxNumSeqs:
-                    description: Maximum number of sequences
-                    format: int32
-                    type: integer
-                  modelURL:
-                    description: Model URL
-                    type: string
-                  toolCallParser:
-                    description: Tool call parser
-                    type: string
-                required:
-                - modelURL
-                type: object
-              vllmConfig:
-                description: vLLM server configuration
-                properties:
-                  enableChunkedPrefill:
-                    description: Enable chunked prefill
-                    type: boolean
-                  enablePrefixCaching:
-                    description: Enable prefix caching
-                    type: boolean
-                  env:
-                    description: Environment variables
-                    items:
-                      description: EnvVar represents an environment variable
+    - name: v1alpha1
+      schema:
+        openAPIV3Schema:
+          description: VLLMRuntime is the Schema for the vllmruntimes API
+          properties:
+            apiVersion:
+              description: |-
+                APIVersion defines the versioned schema of this representation of an object.
+                Servers should convert recognized schemas to the latest internal value, and
+                may reject unrecognized values.
+                More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+              type: string
+            kind:
+              description: |-
+                Kind is a string value representing the REST resource this object represents.
+                Servers may infer this from the endpoint the client submits requests to.
+                Cannot be updated.
+                In CamelCase.
+                More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+              type: string
+            metadata:
+              type: object
+            spec:
+              description: VLLMRuntimeSpec defines the desired state of VLLMRuntime
+              properties:
+                deploymentConfig:
+                  description: Deployment configuration
+                  properties:
+                    deploymentStrategy:
+                      default: RollingUpdate
+                      description: Deploy strategy
+                      enum:
+                        - RollingUpdate
+                        - Recreate
+                      type: string
+                    image:
+                      description: Image configuration
                       properties:
                         name:
                           type: string
-                        value:
+                        pullPolicy:
+                          type: string
+                        pullSecretName:
+                          type: string
+                        registry:
                           type: string
                       required:
-                      - name
-                      - value
+                        - name
+                        - registry
+                      type: object
+                    replicas:
+                      default: 1
+                      description: Replicas
+                      format: int32
+                      type: integer
+                    resources:
+                      description: Resource requirements
+                      properties:
+                        cpu:
+                          type: string
+                        gpu:
+                          type: string
+                        memory:
+                          type: string
                       type: object
-                    type: array
-                  extraArgs:
-                    description: Extra arguments for vllm serve
-                    items:
+                  required:
+                    - image
+                    - resources
+                  type: object
+                lmCacheConfig:
+                  description: LM Cache configuration
+                  properties:
+                    cpuOffloadingBufferSize:
+                      default: 4Gi
+                      description:
+                        CPUOffloadingBufferSize is the size of the CPU offloading
+                        buffer
                       type: string
-                    type: array
-                  gpuMemoryUtilization:
-                    description: GPU memory utilization
-                    type: string
-                  maxLoras:
-                    description: Maximum number of LoRAs
-                    format: int32
-                    type: integer
-                  port:
-                    default: 8000
-                    description: Port for vLLM server
-                    format: int32
-                    type: integer
-                  tensorParallelSize:
-                    description: Tensor parallel size
-                    format: int32
-                    type: integer
-                  v1:
-                    description: Use V1 API
-                    type: boolean
-                type: object
-            required:
-            - deploymentConfig
-            - model
-            - vllmConfig
-            type: object
-          status:
-            description: VLLMRuntimeStatus defines the observed state of VLLMRuntime
-            properties:
-              lastUpdated:
-                description: Last updated timestamp
-                format: date-time
-                type: string
-              modelStatus:
-                description: Model status
-                type: string
-            type: object
-        type: object
-    served: true
-    storage: true
-    subresources:
-      status: {}
+                    diskOffloadingBufferSize:
+                      default: 8Gi
+                      description:
+                        DiskOffloadingBufferSize is the size of the disk
+                        offloading buffer
+                      type: string
+                    enabled:
+                      default: false
+                      description: Enabled enables LM Cache
+                      type: boolean
+                    remoteSerde:
+                      description:
+                        RemoteSerde is the serialization format for the remote
+                        cache
+                      type: string
+                    remoteUrl:
+                      description: RemoteURL is the URL of the remote cache server
+                      type: string
+                  type: object
+                model:
+                  description: Model configuration
+                  properties:
+                    dtype:
+                      description: Data type
+                      type: string
+                    enableLoRA:
+                      description: Enable LoRA
+                      type: boolean
+                    enableTool:
+                      description: Enable tool
+                      type: boolean
+                    hfTokenName:
+                      default: token
+                      type: string
+                    hfTokenSecret:
+                      description: HuggingFace token secret
+                      properties:
+                        name:
+                          default: ""
+                          description: |-
+                            Name of the referent.
+                            This field is effectively required, but due to backwards compatibility is
+                            allowed to be empty. Instances of this type with an empty value here are
+                            almost certainly wrong.
+                            More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                          type: string
+                      type: object
+                      x-kubernetes-map-type: atomic
+                    maxModelLen:
+                      description: Maximum model length
+                      format: int32
+                      type: integer
+                    maxNumSeqs:
+                      description: Maximum number of sequences
+                      format: int32
+                      type: integer
+                    modelURL:
+                      description: Model URL
+                      type: string
+                    toolCallParser:
+                      description: Tool call parser
+                      type: string
+                  required:
+                    - modelURL
+                  type: object
+                vllmConfig:
+                  description: vLLM server configuration
+                  properties:
+                    enableChunkedPrefill:
+                      description: Enable chunked prefill
+                      type: boolean
+                    enablePrefixCaching:
+                      description: Enable prefix caching
+                      type: boolean
+                    env:
+                      description: Environment variables
+                      items:
+                        description: EnvVar represents an environment variable
+                        properties:
+                          name:
+                            type: string
+                          value:
+                            type: string
+                        required:
+                          - name
+                          - value
+                        type: object
+                      type: array
+                    extraArgs:
+                      description: Extra arguments for vllm serve
+                      items:
+                        type: string
+                      type: array
+                    gpuMemoryUtilization:
+                      description: GPU memory utilization
+                      type: string
+                    maxLoras:
+                      description: Maximum number of LoRAs
+                      format: int32
+                      type: integer
+                    port:
+                      default: 8000
+                      description: Port for vLLM server
+                      format: int32
+                      type: integer
+                    tensorParallelSize:
+                      description: Tensor parallel size
+                      format: int32
+                      type: integer
+                    v1:
+                      description: Use V1 API
+                      type: boolean
+                  type: object
+              required:
+                - deploymentConfig
+                - model
+                - vllmConfig
+              type: object
+            status:
+              description: VLLMRuntimeStatus defines the observed state of VLLMRuntime
+              properties:
+                lastUpdated:
+                  description: Last updated timestamp
+                  format: date-time
+                  type: string
+                modelStatus:
+                  description: Model status
+                  type: string
+              type: object
+          type: object
+      served: true
+      storage: true
+      subresources:
+        status: {}
 ---
 apiVersion: v1
 kind: ServiceAccount
@@ -873,37 +900,37 @@ metadata:
   name: production-stack-leader-election-role
   namespace: production-stack-system
 rules:
-- apiGroups:
-  - ""
-  resources:
-  - configmaps
-  verbs:
-  - get
-  - list
-  - watch
-  - create
-  - update
-  - patch
-  - delete
-- apiGroups:
-  - coordination.k8s.io
-  resources:
-  - leases
-  verbs:
-  - get
-  - list
-  - watch
-  - create
-  - update
-  - patch
-  - delete
-- apiGroups:
-  - ""
-  resources:
-  - events
-  verbs:
-  - create
-  - patch
+  - apiGroups:
+      - ""
+    resources:
+      - configmaps
+    verbs:
+      - get
+      - list
+      - watch
+      - create
+      - update
+      - patch
+      - delete
+  - apiGroups:
+      - coordination.k8s.io
+    resources:
+      - leases
+    verbs:
+      - get
+      - list
+      - watch
+      - create
+      - update
+      - patch
+      - delete
+  - apiGroups:
+      - ""
+    resources:
+      - events
+    verbs:
+      - create
+      - patch
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
@@ -913,18 +940,18 @@ metadata:
     app.kubernetes.io/name: production-stack
   name: production-stack-cacheserver-admin-role
 rules:
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - cacheservers
-  verbs:
-  - '*'
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - cacheservers/status
-  verbs:
-  - get
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - cacheservers
+    verbs:
+      - "*"
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - cacheservers/status
+    verbs:
+      - get
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
@@ -934,24 +961,24 @@ metadata:
     app.kubernetes.io/name: production-stack
   name: production-stack-cacheserver-editor-role
 rules:
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - cacheservers
-  verbs:
-  - create
-  - delete
-  - get
-  - list
-  - patch
-  - update
-  - watch
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - cacheservers/status
-  verbs:
-  - get
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - cacheservers
+    verbs:
+      - create
+      - delete
+      - get
+      - list
+      - patch
+      - update
+      - watch
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - cacheservers/status
+    verbs:
+      - get
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
@@ -961,20 +988,20 @@ metadata:
     app.kubernetes.io/name: production-stack
   name: production-stack-cacheserver-viewer-role
 rules:
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - cacheservers
-  verbs:
-  - get
-  - list
-  - watch
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - cacheservers/status
-  verbs:
-  - get
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - cacheservers
+    verbs:
+      - get
+      - list
+      - watch
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - cacheservers/status
+    verbs:
+      - get
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
@@ -984,18 +1011,18 @@ metadata:
     app.kubernetes.io/name: production-stack
   name: production-stack-loraadapter-admin-role
 rules:
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - loraadapters
-  verbs:
-  - '*'
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - loraadapters/status
-  verbs:
-  - get
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - loraadapters
+    verbs:
+      - "*"
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - loraadapters/status
+    verbs:
+      - get
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
@@ -1005,24 +1032,24 @@ metadata:
     app.kubernetes.io/name: production-stack
   name: production-stack-loraadapter-editor-role
 rules:
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - loraadapters
-  verbs:
-  - create
-  - delete
-  - get
-  - list
-  - patch
-  - update
-  - watch
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - loraadapters/status
-  verbs:
-  - get
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - loraadapters
+    verbs:
+      - create
+      - delete
+      - get
+      - list
+      - patch
+      - update
+      - watch
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - loraadapters/status
+    verbs:
+      - get
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
@@ -1032,137 +1059,137 @@ metadata:
     app.kubernetes.io/name: production-stack
   name: production-stack-loraadapter-viewer-role
 rules:
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - loraadapters
-  verbs:
-  - get
-  - list
-  - watch
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - loraadapters/status
-  verbs:
-  - get
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - loraadapters
+    verbs:
+      - get
+      - list
+      - watch
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - loraadapters/status
+    verbs:
+      - get
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
 metadata:
   name: production-stack-manager-role
 rules:
-- apiGroups:
-  - ""
-  resources:
-  - configmaps
-  - secrets
-  - serviceaccounts
-  - services
-  verbs:
-  - create
-  - delete
-  - get
-  - list
-  - patch
-  - update
-  - watch
-- apiGroups:
-  - ""
-  resources:
-  - pods
-  verbs:
-  - get
-  - list
-  - watch
-- apiGroups:
-  - apps
-  resources:
-  - deployments
-  verbs:
-  - create
-  - delete
-  - get
-  - list
-  - patch
-  - update
-  - watch
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - cacheservers
-  - loraadapters
-  - vllmrouters
-  - vllmruntimes
-  verbs:
-  - create
-  - delete
-  - get
-  - list
-  - patch
-  - update
-  - watch
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - cacheservers/finalizers
-  - loraadapters/finalizers
-  - vllmrouters/finalizers
-  - vllmruntimes/finalizers
-  verbs:
-  - update
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - cacheservers/status
-  - loraadapters/status
-  - vllmrouters/status
-  - vllmruntimes/status
-  verbs:
-  - get
-  - patch
-  - update
-- apiGroups:
-  - rbac.authorization.k8s.io
-  resources:
-  - rolebindings
-  - roles
-  verbs:
-  - create
-  - delete
-  - get
-  - list
-  - patch
-  - update
-  - watch
+  - apiGroups:
+      - ""
+    resources:
+      - configmaps
+      - secrets
+      - serviceaccounts
+      - services
+    verbs:
+      - create
+      - delete
+      - get
+      - list
+      - patch
+      - update
+      - watch
+  - apiGroups:
+      - ""
+    resources:
+      - pods
+    verbs:
+      - get
+      - list
+      - watch
+  - apiGroups:
+      - apps
+    resources:
+      - deployments
+    verbs:
+      - create
+      - delete
+      - get
+      - list
+      - patch
+      - update
+      - watch
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - cacheservers
+      - loraadapters
+      - vllmrouters
+      - vllmruntimes
+    verbs:
+      - create
+      - delete
+      - get
+      - list
+      - patch
+      - update
+      - watch
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - cacheservers/finalizers
+      - loraadapters/finalizers
+      - vllmrouters/finalizers
+      - vllmruntimes/finalizers
+    verbs:
+      - update
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - cacheservers/status
+      - loraadapters/status
+      - vllmrouters/status
+      - vllmruntimes/status
+    verbs:
+      - get
+      - patch
+      - update
+  - apiGroups:
+      - rbac.authorization.k8s.io
+    resources:
+      - rolebindings
+      - roles
+    verbs:
+      - create
+      - delete
+      - get
+      - list
+      - patch
+      - update
+      - watch
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
 metadata:
   name: production-stack-metrics-auth-role
 rules:
-- apiGroups:
-  - authentication.k8s.io
-  resources:
-  - tokenreviews
-  verbs:
-  - create
-- apiGroups:
-  - authorization.k8s.io
-  resources:
-  - subjectaccessreviews
-  verbs:
-  - create
+  - apiGroups:
+      - authentication.k8s.io
+    resources:
+      - tokenreviews
+    verbs:
+      - create
+  - apiGroups:
+      - authorization.k8s.io
+    resources:
+      - subjectaccessreviews
+    verbs:
+      - create
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
 metadata:
   name: production-stack-metrics-reader
 rules:
-- nonResourceURLs:
-  - /metrics
-  verbs:
-  - get
+  - nonResourceURLs:
+      - /metrics
+    verbs:
+      - get
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
@@ -1172,18 +1199,18 @@ metadata:
     app.kubernetes.io/name: production-stack
   name: production-stack-vllmrouter-admin-role
 rules:
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - vllmrouters
-  verbs:
-  - '*'
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - vllmrouters/status
-  verbs:
-  - get
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - vllmrouters
+    verbs:
+      - "*"
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - vllmrouters/status
+    verbs:
+      - get
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
@@ -1193,24 +1220,24 @@ metadata:
     app.kubernetes.io/name: production-stack
   name: production-stack-vllmrouter-editor-role
 rules:
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - vllmrouters
-  verbs:
-  - create
-  - delete
-  - get
-  - list
-  - patch
-  - update
-  - watch
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - vllmrouters/status
-  verbs:
-  - get
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - vllmrouters
+    verbs:
+      - create
+      - delete
+      - get
+      - list
+      - patch
+      - update
+      - watch
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - vllmrouters/status
+    verbs:
+      - get
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
@@ -1220,20 +1247,20 @@ metadata:
     app.kubernetes.io/name: production-stack
   name: production-stack-vllmrouter-viewer-role
 rules:
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - vllmrouters
-  verbs:
-  - get
-  - list
-  - watch
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - vllmrouters/status
-  verbs:
-  - get
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - vllmrouters
+    verbs:
+      - get
+      - list
+      - watch
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - vllmrouters/status
+    verbs:
+      - get
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
@@ -1243,18 +1270,18 @@ metadata:
     app.kubernetes.io/name: production-stack
   name: production-stack-vllmruntime-admin-role
 rules:
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - vllmruntimes
-  verbs:
-  - '*'
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - vllmruntimes/status
-  verbs:
-  - get
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - vllmruntimes
+    verbs:
+      - "*"
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - vllmruntimes/status
+    verbs:
+      - get
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
@@ -1264,24 +1291,24 @@ metadata:
     app.kubernetes.io/name: production-stack
   name: production-stack-vllmruntime-editor-role
 rules:
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - vllmruntimes
-  verbs:
-  - create
-  - delete
-  - get
-  - list
-  - patch
-  - update
-  - watch
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - vllmruntimes/status
-  verbs:
-  - get
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - vllmruntimes
+    verbs:
+      - create
+      - delete
+      - get
+      - list
+      - patch
+      - update
+      - watch
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - vllmruntimes/status
+    verbs:
+      - get
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
@@ -1291,20 +1318,20 @@ metadata:
     app.kubernetes.io/name: production-stack
   name: production-stack-vllmruntime-viewer-role
 rules:
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - vllmruntimes
-  verbs:
-  - get
-  - list
-  - watch
-- apiGroups:
-  - production-stack.vllm.ai
-  resources:
-  - vllmruntimes/status
-  verbs:
-  - get
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - vllmruntimes
+    verbs:
+      - get
+      - list
+      - watch
+  - apiGroups:
+      - production-stack.vllm.ai
+    resources:
+      - vllmruntimes/status
+    verbs:
+      - get
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: RoleBinding
@@ -1319,9 +1346,9 @@ roleRef:
   kind: Role
   name: production-stack-leader-election-role
 subjects:
-- kind: ServiceAccount
-  name: production-stack-controller-manager
-  namespace: production-stack-system
+  - kind: ServiceAccount
+    name: production-stack-controller-manager
+    namespace: production-stack-system
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRoleBinding
@@ -1335,9 +1362,9 @@ roleRef:
   kind: ClusterRole
   name: production-stack-manager-role
 subjects:
-- kind: ServiceAccount
-  name: production-stack-controller-manager
-  namespace: production-stack-system
+  - kind: ServiceAccount
+    name: production-stack-controller-manager
+    namespace: production-stack-system
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRoleBinding
@@ -1348,9 +1375,9 @@ roleRef:
   kind: ClusterRole
   name: production-stack-metrics-auth-role
 subjects:
-- kind: ServiceAccount
-  name: production-stack-controller-manager
-  namespace: production-stack-system
+  - kind: ServiceAccount
+    name: production-stack-controller-manager
+    namespace: production-stack-system
 ---
 apiVersion: v1
 kind: Service
@@ -1363,10 +1390,10 @@ metadata:
   namespace: production-stack-system
 spec:
   ports:
-  - name: https
-    port: 8443
-    protocol: TCP
-    targetPort: 8443
+    - name: https
+      port: 8443
+      protocol: TCP
+      targetPort: 8443
   selector:
     app.kubernetes.io/name: production-stack
     control-plane: controller-manager
@@ -1397,41 +1424,41 @@ spec:
         app.kubernetes.io/name: production-stack
     spec:
       containers:
-      - args:
-        - --metrics-bind-address=:8443
-        - --leader-elect
-        - --health-probe-bind-address=:8081
-        command:
-        - /manager
-        image: lmcache/production-stack-operator:latest
-        imagePullPolicy: Always
-        livenessProbe:
-          httpGet:
-            path: /healthz
-            port: 8081
-          initialDelaySeconds: 15
-          periodSeconds: 20
-        name: manager
-        ports: []
-        readinessProbe:
-          httpGet:
-            path: /readyz
-            port: 8081
-          initialDelaySeconds: 5
-          periodSeconds: 10
-        resources:
-          limits:
-            cpu: 500m
-            memory: 128Mi
-          requests:
-            cpu: 10m
-            memory: 64Mi
-        securityContext:
-          allowPrivilegeEscalation: false
-          capabilities:
-            drop:
-            - ALL
-        volumeMounts: []
+        - args:
+            - --metrics-bind-address=:8443
+            - --leader-elect
+            - --health-probe-bind-address=:8081
+          command:
+            - /manager
+          image: lmcache/production-stack-operator:latest
+          imagePullPolicy: Always
+          livenessProbe:
+            httpGet:
+              path: /healthz
+              port: 8081
+            initialDelaySeconds: 15
+            periodSeconds: 20
+          name: manager
+          ports: []
+          readinessProbe:
+            httpGet:
+              path: /readyz
+              port: 8081
+            initialDelaySeconds: 5
+            periodSeconds: 10
+          resources:
+            limits:
+              cpu: 500m
+              memory: 128Mi
+            requests:
+              cpu: 10m
+              memory: 64Mi
+          securityContext:
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+                - ALL
+          volumeMounts: []
       securityContext:
         runAsNonRoot: true
         seccompProfile:
diff --git a/operator/config/manager/kustomization.yaml b/operator/config/manager/kustomization.yaml
index cc9f03e24..ac10fc9f6 100644
--- a/operator/config/manager/kustomization.yaml
+++ b/operator/config/manager/kustomization.yaml
@@ -1,9 +1,3 @@
 resources:
-- namespace.yaml
-- deployment.yaml
-apiVersion: kustomize.config.k8s.io/v1beta1
-kind: Kustomization
-images:
-- name: controller
-  newName: 1nfinity/production-stack-controller
-  newTag: latest
+  - namespace.yaml
+  - deployment.yaml