diff --git a/.github/workflows/push-charts.yaml b/.github/workflows/push-charts.yaml
index 7c3a14b1..70cbd76c 100644
--- a/.github/workflows/push-charts.yaml
+++ b/.github/workflows/push-charts.yaml
@@ -80,3 +80,21 @@ jobs:
CHART_PACKAGE=$(ls $CHART_DIR/*.tgz)
helm push $CHART_PACKAGE oci://${{ env.REGISTRY }}/${{ github.repository }}/charts/
done
+ - name: Get all changed decisions Chart.yaml files
+ id: changed-chart-yaml-files-decisions
+ uses: tj-actions/changed-files@v47
+ with:
+ files: |
+ decisions/dist/chart/Chart.yaml
+ - name: Push decisions charts to registry
+ if: steps.changed-chart-yaml-files-decisions.outputs.all_changed_files != ''
+ shell: bash
+ env:
+ ALL_CHANGED_FILES: ${{ steps.changed-chart-yaml-files-decisions.outputs.all_changed_files }}
+ run: |
+ for CHART_FILE in ${ALL_CHANGED_FILES}; do
+ CHART_DIR=$(dirname $CHART_FILE)
+ helm package $CHART_DIR --dependency-update --destination $CHART_DIR
+ CHART_PACKAGE=$(ls $CHART_DIR/*.tgz)
+ helm push $CHART_PACKAGE oci://${{ env.REGISTRY }}/${{ github.repository }}/charts/
+ done
diff --git a/.github/workflows/push-images.yaml b/.github/workflows/push-images.yaml
index 43abfbe7..cd9437ba 100644
--- a/.github/workflows/push-images.yaml
+++ b/.github/workflows/push-images.yaml
@@ -139,3 +139,44 @@ jobs:
subject-name: ${{ env.REGISTRY }}/${{ github.repository }}-reservations-operator
subject-digest: ${{ steps.push_cortex_reservations.outputs.digest }}
push-to-registry: true
+ # Only build and push the decisions operator image if there are changes
+ # in the decisions directory.
+ - name: Get all changed decisions/ files
+ id: changed_decisions_files
+ uses: tj-actions/changed-files@v47
+ with:
+ files: |
+ decisions/**
+ - name: Docker Meta (Cortex Decisions)
+ if: steps.changed_decisions_files.outputs.all_changed_files != ''
+ id: meta_cortex_decisions
+ uses: docker/metadata-action@v5
+ with:
+ images: ${{ env.REGISTRY }}/${{ github.repository }}-decisions-operator
+ tags: |
+ type=semver,pattern={{version}}
+ type=semver,pattern={{major}}.{{minor}}
+ type=sha
+ latest
+ - name: Build and Push Cortex Decisions Operator
+ if: steps.changed_decisions_files.outputs.all_changed_files != ''
+ id: push_cortex_decisions
+ uses: docker/build-push-action@v6
+ with:
+ context: .
+ file: Dockerfile.kubebuilder
+ platforms: linux/amd64,linux/arm64
+ push: true
+ tags: ${{ steps.meta_cortex_decisions.outputs.tags }}
+ labels: ${{ steps.meta_cortex_decisions.outputs.labels }}
+ build-args: |
+ GO_MOD_PATH=decisions
+ GIT_TAG=${{ github.ref_name }}
+ GIT_COMMIT=${{ github.sha }}
+ - name: Generate Artifact Attestation for Cortex Decisions
+ if: steps.changed_decisions_files.outputs.all_changed_files != ''
+ uses: actions/attest-build-provenance@v3
+ with:
+ subject-name: ${{ env.REGISTRY }}/${{ github.repository }}-decisions-operator
+ subject-digest: ${{ steps.push_cortex_decisions.outputs.digest }}
+ push-to-registry: true
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 9888dff8..79833951 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -25,6 +25,8 @@ jobs:
go test -v ./...
echo "Testing reservations module..."
cd reservations && go test -v ./...
+ echo "Testing decisions module..."
+ cd ../decisions && go test -v ./...
test-with-docker:
# We don't need to run this longer test if the previous one already failed.
@@ -61,6 +63,14 @@ jobs:
-coverprofile=reservations_profile.cov ./internal/...
go tool cover -func reservations_profile.cov > reservations_func_coverage.txt
cd ..
+
+ echo "Running tests for decisions module..."
+ cd decisions
+ go test -v \
+ -coverpkg=./internal/... \
+ -coverprofile=decisions_profile.cov ./internal/...
+ go tool cover -func decisions_profile.cov > decisions_func_coverage.txt
+ cd ..
- name: Upload coverage files
uses: actions/upload-artifact@v4
with:
@@ -68,6 +78,7 @@ jobs:
path: |
pr_func_coverage.txt
reservations/reservations_func_coverage.txt
+ decisions/decisions_func_coverage.txt
# Steps below are only executed if the workflow is triggered by a pull request
- name: Delete old coverage comments (PR only)
if: ${{ github.event_name == 'pull_request' }}
@@ -123,6 +134,19 @@ jobs:
reservationsCoverageReport = 'No coverage data available';
}
+ // Read decisions module coverage report
+ let decisionsCoverageReport = '';
+ let decisionsCoveragePercentage = 'unknown';
+ try {
+ decisionsCoverageReport = fs.readFileSync('decisions/decisions_func_coverage.txt', 'utf8');
+ const decisionsLines = decisionsCoverageReport.trim().split('\n');
+ const decisionsLastLine = decisionsLines[decisionsLines.length - 1];
+ const decisionsCoverageMatch = decisionsLastLine.match(/total:\s+\(statements\)\s+(\d+\.\d+)%/);
+ decisionsCoveragePercentage = decisionsCoverageMatch ? decisionsCoverageMatch[1] : 'unknown';
+ } catch (error) {
+ decisionsCoverageReport = 'No coverage data available';
+ }
+
let commentBody = '\n';
commentBody += '## Test Coverage Report\n\n';
@@ -144,6 +168,16 @@ jobs:
commentBody += '```text\n';
commentBody += reservationsCoverageReport;
commentBody += '```\n';
+ commentBody += '\n\n';
+
+ // Decisions module coverage
+ commentBody += '\n';
+ commentBody += 'Coverage in decisions module (decisions/internal/): ';
+ commentBody += decisionsCoveragePercentage;
+ commentBody += '%
\n\n';
+ commentBody += '```text\n';
+ commentBody += decisionsCoverageReport;
+ commentBody += '```\n';
commentBody += ' \n';
// Post the comment
diff --git a/.github/workflows/update-appversion.yml b/.github/workflows/update-appversion.yml
index f11e8980..dd4c3c51 100644
--- a/.github/workflows/update-appversion.yml
+++ b/.github/workflows/update-appversion.yml
@@ -30,6 +30,13 @@ jobs:
files: |
postgres/**
+ - name: Get all changed decisions/ files
+ id: changed_decisions_files
+ uses: tj-actions/changed-files@v47
+ with:
+ files: |
+ decisions/**
+
# Always bumped
- name: Update appVersion in cortex-core Chart.yaml
run: |
@@ -69,3 +76,17 @@ jobs:
git add reservations/dist/chart/Chart.yaml
git commit -m "Bump cortex-reservations chart appVersions to ${{ steps.vars.outputs.sha }} [skip ci]" || echo "No changes to commit"
git push origin HEAD:main
+
+ # Only bumped if there are changes in the decisions directory.
+ - name: Update appVersion in cortex-decisions Chart.yaml
+ if: steps.changed_decisions_files.outputs.all_changed_files != ''
+ run: |
+ sed -i 's/^\([ ]*appVersion:[ ]*\).*/\1"${{ steps.vars.outputs.sha }}"/' decisions/dist/chart/Chart.yaml
+ - name: Commit and push changes for cortex-decisions
+ if: steps.changed_decisions_files.outputs.all_changed_files != ''
+ run: |
+ git config user.name "github-actions[bot]"
+ git config user.email "github-actions[bot]@users.noreply.github.com"
+ git add decisions/dist/chart/Chart.yaml
+ git commit -m "Bump cortex-decisions chart appVersions to ${{ steps.vars.outputs.sha }} [skip ci]" || echo "No changes to commit"
+ git push origin HEAD:main
diff --git a/Tiltfile b/Tiltfile
index 9d0fec1a..d0ada3c1 100644
--- a/Tiltfile
+++ b/Tiltfile
@@ -37,12 +37,22 @@ def kubebuilder_binary_files(path):
docker_build('ghcr.io/cobaltcore-dev/cortex-reservations-operator', '.',
dockerfile='Dockerfile.kubebuilder',
build_args={'GO_MOD_PATH': 'reservations'},
- only=kubebuilder_binary_files('reservations') + ['internal/', 'go.mod', 'go.sum'],
+ only=kubebuilder_binary_files('reservations') + ['internal/', 'decisions/', 'go.mod', 'go.sum'],
)
local('sh helm/sync.sh reservations/dist/chart')
k8s_yaml(helm('reservations/dist/chart', name='cortex-reservations', values=[tilt_values]))
k8s_resource('reservations-controller-manager', labels=['Reservations'])
+########### Decisions Operator & CRDs
+docker_build('ghcr.io/cobaltcore-dev/cortex-decisions-operator', '.',
+ dockerfile='Dockerfile.kubebuilder',
+ build_args={'GO_MOD_PATH': 'decisions'},
+ only=kubebuilder_binary_files('decisions') + ['internal/', 'go.mod', 'go.sum'],
+)
+local('sh helm/sync.sh decisions/dist/chart')
+k8s_yaml(helm('decisions/dist/chart', name='cortex-decisions', values=[tilt_values]))
+k8s_resource('decisions-controller-manager', labels=['Decisions'])
+
########### Dev Dependencies
local('sh helm/sync.sh helm/dev/cortex-prometheus-operator')
k8s_yaml(helm('./helm/dev/cortex-prometheus-operator', name='cortex-prometheus-operator')) # Operator
@@ -82,6 +92,7 @@ k8s_resource('cortex-plutono', port_forwards=[
docker_build('ghcr.io/cobaltcore-dev/cortex', '.', only=[
'internal/', 'commands/', 'main.go', 'go.mod', 'go.sum', 'Makefile',
'reservations/api/', # API module of the reservations operator needed for the scheduler.
+ 'decisions/api/', # API module of the decisions operator needed for the scheduler.
])
docker_build('ghcr.io/cobaltcore-dev/cortex-postgres', 'postgres')
diff --git a/commands/checks/nova/checks.go b/commands/checks/nova/checks.go
index a0d8916f..11027589 100644
--- a/commands/checks/nova/checks.go
+++ b/commands/checks/nova/checks.go
@@ -271,6 +271,7 @@ func randomRequest(dc datacenter, seed int) api.ExternalSchedulerRequest {
slog.Info("using flavor extra specs", "extraSpecs", extraSpecs)
request := api.ExternalSchedulerRequest{
Spec: api.NovaObject[api.NovaSpec]{Data: api.NovaSpec{
+ InstanceUUID: "cortex-e2e-tests",
AvailabilityZone: az,
ProjectID: project.ID,
Flavor: api.NovaObject[api.NovaFlavor]{Data: api.NovaFlavor{
diff --git a/decisions/LICENSE b/decisions/LICENSE
new file mode 100644
index 00000000..06c1fb23
--- /dev/null
+++ b/decisions/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright 2024 SAP SE or an SAP affiliate company and cobaltcore-dev contributors
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/decisions/Makefile b/decisions/Makefile
new file mode 100644
index 00000000..4d454b5b
--- /dev/null
+++ b/decisions/Makefile
@@ -0,0 +1,58 @@
+.PHONY: all
+all: build
+
+.PHONY: manifests
+manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
+ $(CONTROLLER_GEN) rbac:roleName=manager-role crd:allowDangerousTypes=true webhook paths="./..." output:crd:artifacts:config=config/crd/bases
+
+.PHONY: generate
+generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
+ $(CONTROLLER_GEN) crd:allowDangerousTypes=true object:headerFile="hack/boilerplate.go.txt" paths="./..."
+
+.PHONY: cleanup
+cleanup:
+ rm -rf ./.github
+
+.PHONY: dekustomize
+dekustomize:
+ kubebuilder edit --plugins=helm/v1-alpha
+
+##@ Build
+
+.PHONY: build
+build: manifests generate dekustomize cleanup
+
+LOCALBIN ?= $(shell pwd)/bin
+$(LOCALBIN):
+ mkdir -p $(LOCALBIN)
+CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen
+
+CONTROLLER_TOOLS_VERSION ?= v0.17.2
+
+.PHONY: controller-gen
+controller-gen: $(CONTROLLER_GEN) ## Download controller-gen locally if necessary.
+$(CONTROLLER_GEN): $(LOCALBIN)
+ $(call go-install-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen,$(CONTROLLER_TOOLS_VERSION))
+
+# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
+ifeq (,$(shell go env GOBIN))
+GOBIN=$(shell go env GOPATH)/bin
+else
+GOBIN=$(shell go env GOBIN)
+endif
+
+# go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist
+# $1 - target path with name of binary
+# $2 - package url which can be installed
+# $3 - specific version of package
+define go-install-tool
+@[ -f "$(1)-$(3)" ] || { \
+set -e; \
+package=$(2)@$(3) ;\
+echo "Downloading $${package}" ;\
+rm -f $(1) || true ;\
+GOBIN=$(LOCALBIN) go install $${package} ;\
+mv $(1) $(1)-$(3) ;\
+} ;\
+ln -sf $(1)-$(3) $(1)
+endef
\ No newline at end of file
diff --git a/decisions/PROJECT b/decisions/PROJECT
new file mode 100644
index 00000000..edd787f2
--- /dev/null
+++ b/decisions/PROJECT
@@ -0,0 +1,22 @@
+# Code generated by tool. DO NOT EDIT.
+# This file is used to track the info used to scaffold your project
+# and allow the plugins properly work.
+# More info: https://book.kubebuilder.io/reference/project-config.html
+cliVersion: 4.7.1
+domain: cortex
+layout:
+- go.kubebuilder.io/v4
+plugins:
+ helm.kubebuilder.io/v1-alpha: {}
+projectName: decisions
+repo: github.com/cobaltcore-dev/cortex/decisions
+resources:
+- api:
+ crdVersion: v1
+ controller: true
+ domain: cortex
+ group: decisions
+ kind: SchedulingDecision
+ path: github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1
+ version: v1alpha1
+version: "3"
diff --git a/decisions/api/LICENSE b/decisions/api/LICENSE
new file mode 100644
index 00000000..06c1fb23
--- /dev/null
+++ b/decisions/api/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright 2024 SAP SE or an SAP affiliate company and cobaltcore-dev contributors
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/decisions/api/go.mod b/decisions/api/go.mod
new file mode 100644
index 00000000..db86f9fe
--- /dev/null
+++ b/decisions/api/go.mod
@@ -0,0 +1,27 @@
+module github.com/cobaltcore-dev/cortex/decisions/api
+
+go 1.25.0
+
+require (
+ k8s.io/apimachinery v0.34.1
+ sigs.k8s.io/controller-runtime v0.22.1
+)
+
+require (
+ github.com/fxamacker/cbor/v2 v2.9.0 // indirect
+ github.com/go-logr/logr v1.4.2 // indirect
+ github.com/gogo/protobuf v1.3.2 // indirect
+ github.com/json-iterator/go v1.1.12 // indirect
+ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+ github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
+ github.com/x448/float16 v0.8.4 // indirect
+ go.yaml.in/yaml/v2 v2.4.2 // indirect
+ golang.org/x/net v0.38.0 // indirect
+ golang.org/x/text v0.23.0 // indirect
+ gopkg.in/inf.v0 v0.9.1 // indirect
+ k8s.io/klog/v2 v2.130.1 // indirect
+ k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect
+ sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect
+ sigs.k8s.io/randfill v1.0.0 // indirect
+ sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect
+)
diff --git a/decisions/api/go.sum b/decisions/api/go.sum
new file mode 100644
index 00000000..edd5a267
--- /dev/null
+++ b/decisions/api/go.sum
@@ -0,0 +1,101 @@
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM=
+github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ=
+github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
+github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
+github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
+github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
+github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
+github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
+github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo=
+github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
+github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8=
+github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg=
+github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo=
+github.com/onsi/gomega v1.36.1 h1:bJDPBO7ibjxcbHMgSCoo4Yj18UWbKDlLwX1x9sybDcw=
+github.com/onsi/gomega v1.36.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
+github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
+github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
+github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
+github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
+go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
+golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
+golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
+golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ=
+golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
+gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+k8s.io/api v0.34.0 h1:L+JtP2wDbEYPUeNGbeSa/5GwFtIA662EmT2YSLOkAVE=
+k8s.io/api v0.34.0/go.mod h1:YzgkIzOOlhl9uwWCZNqpw6RJy9L2FK4dlJeayUoydug=
+k8s.io/apimachinery v0.34.1 h1:dTlxFls/eikpJxmAC7MVE8oOeP1zryV7iRyIjB0gky4=
+k8s.io/apimachinery v0.34.1/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw=
+k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
+k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
+k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y=
+k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
+sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV24Eqg=
+sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY=
+sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE=
+sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
+sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
+sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
+sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco=
+sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE=
+sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=
+sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4=
diff --git a/decisions/api/v1alpha1/groupversion_info.go b/decisions/api/v1alpha1/groupversion_info.go
new file mode 100644
index 00000000..8d38e963
--- /dev/null
+++ b/decisions/api/v1alpha1/groupversion_info.go
@@ -0,0 +1,23 @@
+// Copyright 2025 SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+// Package v1alpha1 contains API Schema definitions for the decisions v1alpha1 API group.
+// +kubebuilder:object:generate=true
+// +groupName=decisions.cortex
+package v1alpha1
+
+import (
+ "k8s.io/apimachinery/pkg/runtime/schema"
+ "sigs.k8s.io/controller-runtime/pkg/scheme"
+)
+
+var (
+ // GroupVersion is group version used to register these objects.
+ GroupVersion = schema.GroupVersion{Group: "decisions.cortex", Version: "v1alpha1"}
+
+ // SchemeBuilder is used to add go types to the GroupVersionKind scheme.
+ SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
+
+ // AddToScheme adds the types in this group-version to the given scheme.
+ AddToScheme = SchemeBuilder.AddToScheme
+)
diff --git a/decisions/api/v1alpha1/schedulingdecision_types.go b/decisions/api/v1alpha1/schedulingdecision_types.go
new file mode 100644
index 00000000..3eb62ef3
--- /dev/null
+++ b/decisions/api/v1alpha1/schedulingdecision_types.go
@@ -0,0 +1,119 @@
+// Copyright 2025 SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package v1alpha1
+
+import (
+ "k8s.io/apimachinery/pkg/api/resource"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+type SchedulingEventType string
+
+const (
+ SchedulingEventTypeLiveMigration SchedulingEventType = "live-migration"
+ // SchedulingEventTypeColdMigration SchedulingEventType = "cold-migration"
+ // SchedulingEventTypeEvacuation SchedulingEventType = "evacuation"
+ SchedulingEventTypeResize SchedulingEventType = "resize"
+ SchedulingEventTypeInitialPlacement SchedulingEventType = "initial-placement"
+)
+
+type SchedulingDecisionPipelineOutputSpec struct {
+ Step string `json:"step"`
+ Activations map[string]float64 `json:"activations,omitempty"`
+}
+
+type SchedulingDecisionPipelineSpec struct {
+ Name string `json:"name"`
+ Outputs []SchedulingDecisionPipelineOutputSpec `json:"outputs,omitempty"`
+}
+
+type Flavor struct {
+ Name string `json:"name"`
+ Resources map[string]resource.Quantity `json:"requests,omitempty"`
+}
+
+// SchedulingDecisionSpec defines the desired state of SchedulingDecision.
+type SchedulingDecisionSpec struct { // List of scheduling decisions to be processed.
+ Decisions []SchedulingDecisionRequest `json:"decisions"`
+}
+
+type SchedulingDecisionRequest struct {
+ ID string `json:"id"`
+ RequestedAt metav1.Time `json:"requestedAt"`
+ EventType SchedulingEventType `json:"eventType"`
+ Input map[string]float64 `json:"input,omitempty"`
+ Pipeline SchedulingDecisionPipelineSpec `json:"pipeline"`
+
+ AvailabilityZone string `json:"availabilityZone,omitempty"`
+
+ Flavor Flavor `json:"flavor,omitempty"`
+}
+
+type SchedulingDecisionState string
+
+const (
+ SchedulingDecisionStateResolved SchedulingDecisionState = "resolved"
+ SchedulingDecisionStateError SchedulingDecisionState = "error"
+)
+
+// SchedulingDecisionResult represents the result of processing a single decision request.
+type SchedulingDecisionResult struct {
+ ID string `json:"id"`
+ Description string `json:"description,omitempty"`
+ // Final scores for each host after processing all pipeline steps.
+ FinalScores map[string]float64 `json:"finalScores,omitempty"`
+ // Hosts that were deleted during pipeline processing and all steps that attempted to delete them.
+ DeletedHosts map[string][]string `json:"deletedHosts,omitempty"`
+}
+
+// SchedulingDecisionStatus defines the observed state of SchedulingDecision.
+type SchedulingDecisionStatus struct {
+ State SchedulingDecisionState `json:"state,omitempty"`
+ Error string `json:"error,omitempty"`
+
+ DecisionCount int `json:"decisionCount,omitempty"`
+ GlobalDescription string `json:"globalDescription,omitempty"`
+
+ Results []SchedulingDecisionResult `json:"results,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+// +kubebuilder:subresource:status
+// +kubebuilder:resource:scope=Cluster,shortName=sdec;sdecs
+// +kubebuilder:printcolumn:name="State",type="string",JSONPath=".status.state"
+// +kubebuilder:printcolumn:name="Error",type="string",JSONPath=".status.error"
+// +kubebuilder:printcolumn:name="Created",type="date",JSONPath=".metadata.creationTimestamp"
+// +kubebuilder:printcolumn:name="Decisions",type="integer",JSONPath=".status.decisionCount"
+// +kubebuilder:printcolumn:name="Latest Event",type="string",JSONPath=".spec.decisions[-1].eventType"
+// +kubebuilder:printcolumn:name="Description",type="string",JSONPath=".status.globalDescription"
+
+// SchedulingDecision is the Schema for the schedulingdecisions API
+type SchedulingDecision struct {
+ metav1.TypeMeta `json:",inline"`
+
+ // metadata is a standard object metadata
+ // +optional
+ metav1.ObjectMeta `json:"metadata,omitempty,omitzero"`
+
+ // spec defines the desired state of SchedulingDecision
+ // +required
+ Spec SchedulingDecisionSpec `json:"spec"`
+
+ // status defines the observed state of SchedulingDecision
+ // +optional
+ Status SchedulingDecisionStatus `json:"status,omitempty,omitzero"`
+}
+
+// +kubebuilder:object:root=true
+
+// SchedulingDecisionList contains a list of SchedulingDecision
+type SchedulingDecisionList struct {
+ metav1.TypeMeta `json:",inline"`
+ metav1.ListMeta `json:"metadata,omitempty"`
+ Items []SchedulingDecision `json:"items"`
+}
+
+func init() {
+ SchemeBuilder.Register(&SchedulingDecision{}, &SchedulingDecisionList{})
+}
diff --git a/decisions/api/v1alpha1/zz_generated.deepcopy.go b/decisions/api/v1alpha1/zz_generated.deepcopy.go
new file mode 100644
index 00000000..2b061852
--- /dev/null
+++ b/decisions/api/v1alpha1/zz_generated.deepcopy.go
@@ -0,0 +1,245 @@
+//go:build !ignore_autogenerated
+
+// Copyright 2025 SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+// Code generated by controller-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+ "k8s.io/apimachinery/pkg/api/resource"
+ runtime "k8s.io/apimachinery/pkg/runtime"
+)
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *Flavor) DeepCopyInto(out *Flavor) {
+ *out = *in
+ if in.Resources != nil {
+ in, out := &in.Resources, &out.Resources
+ *out = make(map[string]resource.Quantity, len(*in))
+ for key, val := range *in {
+ (*out)[key] = val.DeepCopy()
+ }
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Flavor.
+func (in *Flavor) DeepCopy() *Flavor {
+ if in == nil {
+ return nil
+ }
+ out := new(Flavor)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *SchedulingDecision) DeepCopyInto(out *SchedulingDecision) {
+ *out = *in
+ out.TypeMeta = in.TypeMeta
+ in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+ in.Spec.DeepCopyInto(&out.Spec)
+ in.Status.DeepCopyInto(&out.Status)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecision.
+func (in *SchedulingDecision) DeepCopy() *SchedulingDecision {
+ if in == nil {
+ return nil
+ }
+ out := new(SchedulingDecision)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *SchedulingDecision) DeepCopyObject() runtime.Object {
+ if c := in.DeepCopy(); c != nil {
+ return c
+ }
+ return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *SchedulingDecisionList) DeepCopyInto(out *SchedulingDecisionList) {
+ *out = *in
+ out.TypeMeta = in.TypeMeta
+ in.ListMeta.DeepCopyInto(&out.ListMeta)
+ if in.Items != nil {
+ in, out := &in.Items, &out.Items
+ *out = make([]SchedulingDecision, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionList.
+func (in *SchedulingDecisionList) DeepCopy() *SchedulingDecisionList {
+ if in == nil {
+ return nil
+ }
+ out := new(SchedulingDecisionList)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *SchedulingDecisionList) DeepCopyObject() runtime.Object {
+ if c := in.DeepCopy(); c != nil {
+ return c
+ }
+ return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *SchedulingDecisionPipelineOutputSpec) DeepCopyInto(out *SchedulingDecisionPipelineOutputSpec) {
+ *out = *in
+ if in.Activations != nil {
+ in, out := &in.Activations, &out.Activations
+ *out = make(map[string]float64, len(*in))
+ for key, val := range *in {
+ (*out)[key] = val
+ }
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionPipelineOutputSpec.
+func (in *SchedulingDecisionPipelineOutputSpec) DeepCopy() *SchedulingDecisionPipelineOutputSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(SchedulingDecisionPipelineOutputSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *SchedulingDecisionPipelineSpec) DeepCopyInto(out *SchedulingDecisionPipelineSpec) {
+ *out = *in
+ if in.Outputs != nil {
+ in, out := &in.Outputs, &out.Outputs
+ *out = make([]SchedulingDecisionPipelineOutputSpec, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionPipelineSpec.
+func (in *SchedulingDecisionPipelineSpec) DeepCopy() *SchedulingDecisionPipelineSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(SchedulingDecisionPipelineSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *SchedulingDecisionRequest) DeepCopyInto(out *SchedulingDecisionRequest) {
+ *out = *in
+ in.RequestedAt.DeepCopyInto(&out.RequestedAt)
+ if in.Input != nil {
+ in, out := &in.Input, &out.Input
+ *out = make(map[string]float64, len(*in))
+ for key, val := range *in {
+ (*out)[key] = val
+ }
+ }
+ in.Pipeline.DeepCopyInto(&out.Pipeline)
+ in.Flavor.DeepCopyInto(&out.Flavor)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionRequest.
+func (in *SchedulingDecisionRequest) DeepCopy() *SchedulingDecisionRequest {
+ if in == nil {
+ return nil
+ }
+ out := new(SchedulingDecisionRequest)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *SchedulingDecisionResult) DeepCopyInto(out *SchedulingDecisionResult) {
+ *out = *in
+ if in.FinalScores != nil {
+ in, out := &in.FinalScores, &out.FinalScores
+ *out = make(map[string]float64, len(*in))
+ for key, val := range *in {
+ (*out)[key] = val
+ }
+ }
+ if in.DeletedHosts != nil {
+ in, out := &in.DeletedHosts, &out.DeletedHosts
+ *out = make(map[string][]string, len(*in))
+ for key, val := range *in {
+ var outVal []string
+ if val == nil {
+ (*out)[key] = nil
+ } else {
+ inVal := (*in)[key]
+ in, out := &inVal, &outVal
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ (*out)[key] = outVal
+ }
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionResult.
+func (in *SchedulingDecisionResult) DeepCopy() *SchedulingDecisionResult {
+ if in == nil {
+ return nil
+ }
+ out := new(SchedulingDecisionResult)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *SchedulingDecisionSpec) DeepCopyInto(out *SchedulingDecisionSpec) {
+ *out = *in
+ if in.Decisions != nil {
+ in, out := &in.Decisions, &out.Decisions
+ *out = make([]SchedulingDecisionRequest, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionSpec.
+func (in *SchedulingDecisionSpec) DeepCopy() *SchedulingDecisionSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(SchedulingDecisionSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *SchedulingDecisionStatus) DeepCopyInto(out *SchedulingDecisionStatus) {
+ *out = *in
+ if in.Results != nil {
+ in, out := &in.Results, &out.Results
+ *out = make([]SchedulingDecisionResult, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionStatus.
+func (in *SchedulingDecisionStatus) DeepCopy() *SchedulingDecisionStatus {
+ if in == nil {
+ return nil
+ }
+ out := new(SchedulingDecisionStatus)
+ in.DeepCopyInto(out)
+ return out
+}
diff --git a/decisions/cmd/main.go b/decisions/cmd/main.go
new file mode 100644
index 00000000..6298e6d4
--- /dev/null
+++ b/decisions/cmd/main.go
@@ -0,0 +1,242 @@
+// Copyright 2025 SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package main
+
+import (
+ "crypto/tls"
+ "flag"
+ "os"
+ "path/filepath"
+
+ // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
+ // to ensure that exec-entrypoint and run can make use of them.
+ _ "k8s.io/client-go/plugin/pkg/client/auth"
+
+ "k8s.io/apimachinery/pkg/runtime"
+ utilruntime "k8s.io/apimachinery/pkg/util/runtime"
+ clientgoscheme "k8s.io/client-go/kubernetes/scheme"
+ ctrl "sigs.k8s.io/controller-runtime"
+ "sigs.k8s.io/controller-runtime/pkg/certwatcher"
+ "sigs.k8s.io/controller-runtime/pkg/healthz"
+ "sigs.k8s.io/controller-runtime/pkg/log/zap"
+ "sigs.k8s.io/controller-runtime/pkg/metrics/filters"
+ metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
+ "sigs.k8s.io/controller-runtime/pkg/webhook"
+
+ decisionsv1alpha1 "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1"
+ "github.com/cobaltcore-dev/cortex/decisions/internal/controller"
+ "github.com/cobaltcore-dev/cortex/internal/conf"
+ // +kubebuilder:scaffold:imports
+)
+
+var (
+ scheme = runtime.NewScheme()
+ setupLog = ctrl.Log.WithName("setup")
+)
+
+func init() {
+ utilruntime.Must(clientgoscheme.AddToScheme(scheme))
+
+ utilruntime.Must(decisionsv1alpha1.AddToScheme(scheme))
+ // +kubebuilder:scaffold:scheme
+}
+
+// nolint:gocyclo
+func main() {
+ var metricsAddr string
+ var metricsCertPath, metricsCertName, metricsCertKey string
+ var webhookCertPath, webhookCertName, webhookCertKey string
+ var enableLeaderElection bool
+ var probeAddr string
+ var secureMetrics bool
+ var enableHTTP2 bool
+ var tlsOpts []func(*tls.Config)
+ flag.StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metrics endpoint binds to. "+
+ "Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.")
+ flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
+ flag.BoolVar(&enableLeaderElection, "leader-elect", false,
+ "Enable leader election for controller manager. "+
+ "Enabling this will ensure there is only one active controller manager.")
+ flag.BoolVar(&secureMetrics, "metrics-secure", true,
+ "If set, the metrics endpoint is served securely via HTTPS. Use --metrics-secure=false to use HTTP instead.")
+ flag.StringVar(&webhookCertPath, "webhook-cert-path", "", "The directory that contains the webhook certificate.")
+ flag.StringVar(&webhookCertName, "webhook-cert-name", "tls.crt", "The name of the webhook certificate file.")
+ flag.StringVar(&webhookCertKey, "webhook-cert-key", "tls.key", "The name of the webhook key file.")
+ flag.StringVar(&metricsCertPath, "metrics-cert-path", "",
+ "The directory that contains the metrics server certificate.")
+ flag.StringVar(&metricsCertName, "metrics-cert-name", "tls.crt", "The name of the metrics server certificate file.")
+ flag.StringVar(&metricsCertKey, "metrics-cert-key", "tls.key", "The name of the metrics server key file.")
+ flag.BoolVar(&enableHTTP2, "enable-http2", false,
+ "If set, HTTP/2 will be enabled for the metrics and webhook servers")
+ opts := zap.Options{
+ Development: true,
+ }
+ opts.BindFlags(flag.CommandLine)
+ flag.Parse()
+
+ ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
+
+ // if the enable-http2 flag is false (the default), http/2 should be disabled
+ // due to its vulnerabilities. More specifically, disabling http/2 will
+ // prevent from being vulnerable to the HTTP/2 Stream Cancellation and
+ // Rapid Reset CVEs. For more information see:
+ // - https://github.com/advisories/GHSA-qppj-fm5r-hxr3
+ // - https://github.com/advisories/GHSA-4374-p667-p6c8
+ disableHTTP2 := func(c *tls.Config) {
+ setupLog.Info("disabling http/2")
+ c.NextProtos = []string{"http/1.1"}
+ }
+
+ if !enableHTTP2 {
+ tlsOpts = append(tlsOpts, disableHTTP2)
+ }
+
+ // Create watchers for metrics and webhooks certificates
+ var metricsCertWatcher, webhookCertWatcher *certwatcher.CertWatcher
+
+ // Initial webhook TLS options
+ webhookTLSOpts := tlsOpts
+
+ if len(webhookCertPath) > 0 {
+ setupLog.Info("Initializing webhook certificate watcher using provided certificates",
+ "webhook-cert-path", webhookCertPath, "webhook-cert-name", webhookCertName, "webhook-cert-key", webhookCertKey)
+
+ var err error
+ webhookCertWatcher, err = certwatcher.New(
+ filepath.Join(webhookCertPath, webhookCertName),
+ filepath.Join(webhookCertPath, webhookCertKey),
+ )
+ if err != nil {
+ setupLog.Error(err, "Failed to initialize webhook certificate watcher")
+ os.Exit(1)
+ }
+
+ webhookTLSOpts = append(webhookTLSOpts, func(config *tls.Config) {
+ config.GetCertificate = webhookCertWatcher.GetCertificate
+ })
+ }
+
+ webhookServer := webhook.NewServer(webhook.Options{
+ TLSOpts: webhookTLSOpts,
+ })
+
+ // Metrics endpoint is enabled in 'config/default/kustomization.yaml'. The Metrics options configure the server.
+ // More info:
+ // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.21.0/pkg/metrics/server
+ // - https://book.kubebuilder.io/reference/metrics.html
+ metricsServerOptions := metricsserver.Options{
+ BindAddress: metricsAddr,
+ SecureServing: secureMetrics,
+ TLSOpts: tlsOpts,
+ }
+
+ if secureMetrics {
+ // FilterProvider is used to protect the metrics endpoint with authn/authz.
+ // These configurations ensure that only authorized users and service accounts
+ // can access the metrics endpoint. The RBAC are configured in 'config/rbac/kustomization.yaml'. More info:
+ // https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.21.0/pkg/metrics/filters#WithAuthenticationAndAuthorization
+ metricsServerOptions.FilterProvider = filters.WithAuthenticationAndAuthorization
+ }
+
+ // If the certificate is not specified, controller-runtime will automatically
+ // generate self-signed certificates for the metrics server. While convenient for development and testing,
+ // this setup is not recommended for production.
+ //
+ // If you enable certManager, uncomment the following lines:
+ // - [METRICS-WITH-CERTS] at config/default/kustomization.yaml to generate and use certificates
+ // managed by cert-manager for the metrics server.
+ // - [PROMETHEUS-WITH-CERTS] at config/prometheus/kustomization.yaml for TLS certification.
+ if len(metricsCertPath) > 0 {
+ setupLog.Info("Initializing metrics certificate watcher using provided certificates",
+ "metrics-cert-path", metricsCertPath, "metrics-cert-name", metricsCertName, "metrics-cert-key", metricsCertKey)
+
+ var err error
+ metricsCertWatcher, err = certwatcher.New(
+ filepath.Join(metricsCertPath, metricsCertName),
+ filepath.Join(metricsCertPath, metricsCertKey),
+ )
+ if err != nil {
+ setupLog.Error(err, "to initialize metrics certificate watcher", "error", err)
+ os.Exit(1)
+ }
+
+ metricsServerOptions.TLSOpts = append(metricsServerOptions.TLSOpts, func(config *tls.Config) {
+ config.GetCertificate = metricsCertWatcher.GetCertificate
+ })
+ }
+
+ mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
+ Scheme: scheme,
+ Metrics: metricsServerOptions,
+ WebhookServer: webhookServer,
+ HealthProbeBindAddress: probeAddr,
+ LeaderElection: enableLeaderElection,
+ LeaderElectionID: "6fb26449.cortex",
+ // LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily
+ // when the Manager ends. This requires the binary to immediately end when the
+ // Manager is stopped, otherwise, this setting is unsafe. Setting this significantly
+ // speeds up voluntary leader transitions as the new leader don't have to wait
+ // LeaseDuration time first.
+ //
+ // In the default scaffold provided, the program ends immediately after
+ // the manager stops, so would be fine to enable this option. However,
+ // if you are doing or is intended to do any operation such as perform cleanups
+ // after the manager stops then its usage might be unsafe.
+ // LeaderElectionReleaseOnCancel: true,
+ })
+ if err != nil {
+ setupLog.Error(err, "unable to start manager")
+ os.Exit(1)
+ }
+
+ if err := (&controller.SchedulingDecisionReconciler{
+ Client: mgr.GetClient(),
+ Scheme: mgr.GetScheme(),
+ Conf: conf.NewConfig[controller.Config](),
+ }).SetupWithManager(mgr); err != nil {
+ setupLog.Error(err, "unable to create controller", "controller", "SchedulingDecision")
+ os.Exit(1)
+ }
+
+ if err := (&controller.SchedulingDecisionTTLController{
+ Client: mgr.GetClient(),
+ Scheme: mgr.GetScheme(),
+ Conf: conf.NewConfig[controller.Config](),
+ }).SetupWithManager(mgr); err != nil {
+ setupLog.Error(err, "unable to create controller", "controller", "SchedulingDecisionTTL")
+ os.Exit(1)
+ }
+ // +kubebuilder:scaffold:builder
+
+ if metricsCertWatcher != nil {
+ setupLog.Info("Adding metrics certificate watcher to manager")
+ if err := mgr.Add(metricsCertWatcher); err != nil {
+ setupLog.Error(err, "unable to add metrics certificate watcher to manager")
+ os.Exit(1)
+ }
+ }
+
+ if webhookCertWatcher != nil {
+ setupLog.Info("Adding webhook certificate watcher to manager")
+ if err := mgr.Add(webhookCertWatcher); err != nil {
+ setupLog.Error(err, "unable to add webhook certificate watcher to manager")
+ os.Exit(1)
+ }
+ }
+
+ if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
+ setupLog.Error(err, "unable to set up health check")
+ os.Exit(1)
+ }
+ if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
+ setupLog.Error(err, "unable to set up ready check")
+ os.Exit(1)
+ }
+
+ setupLog.Info("starting manager")
+ if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
+ setupLog.Error(err, "problem running manager")
+ os.Exit(1)
+ }
+}
diff --git a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml
new file mode 100644
index 00000000..64061bcb
--- /dev/null
+++ b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml
@@ -0,0 +1,171 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.17.2
+ name: schedulingdecisions.decisions.cortex
+spec:
+ group: decisions.cortex
+ names:
+ kind: SchedulingDecision
+ listKind: SchedulingDecisionList
+ plural: schedulingdecisions
+ shortNames:
+ - sdec
+ - sdecs
+ singular: schedulingdecision
+ scope: Cluster
+ versions:
+ - additionalPrinterColumns:
+ - jsonPath: .status.state
+ name: State
+ type: string
+ - jsonPath: .status.error
+ name: Error
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Created
+ type: date
+ - jsonPath: .status.decisionCount
+ name: Decisions
+ type: integer
+ - jsonPath: .spec.decisions[-1].eventType
+ name: Latest Event
+ type: string
+ - jsonPath: .status.globalDescription
+ name: Description
+ type: string
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: SchedulingDecision is the Schema for the schedulingdecisions
+ API
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: spec defines the desired state of SchedulingDecision
+ properties:
+ decisions:
+ items:
+ properties:
+ availabilityZone:
+ type: string
+ eventType:
+ type: string
+ flavor:
+ properties:
+ name:
+ type: string
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ type: object
+ required:
+ - name
+ type: object
+ id:
+ type: string
+ input:
+ additionalProperties:
+ type: number
+ type: object
+ pipeline:
+ properties:
+ name:
+ type: string
+ outputs:
+ items:
+ properties:
+ activations:
+ additionalProperties:
+ type: number
+ type: object
+ step:
+ type: string
+ required:
+ - step
+ type: object
+ type: array
+ required:
+ - name
+ type: object
+ requestedAt:
+ format: date-time
+ type: string
+ required:
+ - eventType
+ - id
+ - pipeline
+ - requestedAt
+ type: object
+ type: array
+ required:
+ - decisions
+ type: object
+ status:
+ description: status defines the observed state of SchedulingDecision
+ properties:
+ decisionCount:
+ type: integer
+ error:
+ type: string
+ globalDescription:
+ type: string
+ results:
+ items:
+ description: SchedulingDecisionResult represents the result of processing
+ a single decision request.
+ properties:
+ deletedHosts:
+ additionalProperties:
+ items:
+ type: string
+ type: array
+ description: Hosts that were deleted during pipeline processing
+ and all steps that attempted to delete them.
+ type: object
+ description:
+ type: string
+ finalScores:
+ additionalProperties:
+ type: number
+ description: Final scores for each host after processing all
+ pipeline steps.
+ type: object
+ id:
+ type: string
+ required:
+ - id
+ type: object
+ type: array
+ state:
+ type: string
+ type: object
+ required:
+ - spec
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml
new file mode 100644
index 00000000..64061bcb
--- /dev/null
+++ b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml
@@ -0,0 +1,171 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.17.2
+ name: schedulingdecisions.decisions.cortex
+spec:
+ group: decisions.cortex
+ names:
+ kind: SchedulingDecision
+ listKind: SchedulingDecisionList
+ plural: schedulingdecisions
+ shortNames:
+ - sdec
+ - sdecs
+ singular: schedulingdecision
+ scope: Cluster
+ versions:
+ - additionalPrinterColumns:
+ - jsonPath: .status.state
+ name: State
+ type: string
+ - jsonPath: .status.error
+ name: Error
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Created
+ type: date
+ - jsonPath: .status.decisionCount
+ name: Decisions
+ type: integer
+ - jsonPath: .spec.decisions[-1].eventType
+ name: Latest Event
+ type: string
+ - jsonPath: .status.globalDescription
+ name: Description
+ type: string
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: SchedulingDecision is the Schema for the schedulingdecisions
+ API
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: spec defines the desired state of SchedulingDecision
+ properties:
+ decisions:
+ items:
+ properties:
+ availabilityZone:
+ type: string
+ eventType:
+ type: string
+ flavor:
+ properties:
+ name:
+ type: string
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ type: object
+ required:
+ - name
+ type: object
+ id:
+ type: string
+ input:
+ additionalProperties:
+ type: number
+ type: object
+ pipeline:
+ properties:
+ name:
+ type: string
+ outputs:
+ items:
+ properties:
+ activations:
+ additionalProperties:
+ type: number
+ type: object
+ step:
+ type: string
+ required:
+ - step
+ type: object
+ type: array
+ required:
+ - name
+ type: object
+ requestedAt:
+ format: date-time
+ type: string
+ required:
+ - eventType
+ - id
+ - pipeline
+ - requestedAt
+ type: object
+ type: array
+ required:
+ - decisions
+ type: object
+ status:
+ description: status defines the observed state of SchedulingDecision
+ properties:
+ decisionCount:
+ type: integer
+ error:
+ type: string
+ globalDescription:
+ type: string
+ results:
+ items:
+ description: SchedulingDecisionResult represents the result of processing
+ a single decision request.
+ properties:
+ deletedHosts:
+ additionalProperties:
+ items:
+ type: string
+ type: array
+ description: Hosts that were deleted during pipeline processing
+ and all steps that attempted to delete them.
+ type: object
+ description:
+ type: string
+ finalScores:
+ additionalProperties:
+ type: number
+ description: Final scores for each host after processing all
+ pipeline steps.
+ type: object
+ id:
+ type: string
+ required:
+ - id
+ type: object
+ type: array
+ state:
+ type: string
+ type: object
+ required:
+ - spec
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/decisions/config/crd/kustomization.yaml b/decisions/config/crd/kustomization.yaml
new file mode 100644
index 00000000..c1caafe2
--- /dev/null
+++ b/decisions/config/crd/kustomization.yaml
@@ -0,0 +1,16 @@
+# This kustomization.yaml is not intended to be run by itself,
+# since it depends on service name and namespace that are out of this kustomize package.
+# It should be run by config/default
+resources:
+- bases/decisions.cortex_schedulingdecisions.yaml
+# +kubebuilder:scaffold:crdkustomizeresource
+
+patches:
+# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix.
+# patches here are for enabling the conversion webhook for each CRD
+# +kubebuilder:scaffold:crdkustomizewebhookpatch
+
+# [WEBHOOK] To enable webhook, uncomment the following section
+# the following config is for teaching kustomize how to do kustomization for CRDs.
+#configurations:
+#- kustomizeconfig.yaml
diff --git a/decisions/config/crd/kustomizeconfig.yaml b/decisions/config/crd/kustomizeconfig.yaml
new file mode 100644
index 00000000..ec5c150a
--- /dev/null
+++ b/decisions/config/crd/kustomizeconfig.yaml
@@ -0,0 +1,19 @@
+# This file is for teaching kustomize how to substitute name and namespace reference in CRD
+nameReference:
+- kind: Service
+ version: v1
+ fieldSpecs:
+ - kind: CustomResourceDefinition
+ version: v1
+ group: apiextensions.k8s.io
+ path: spec/conversion/webhook/clientConfig/service/name
+
+namespace:
+- kind: CustomResourceDefinition
+ version: v1
+ group: apiextensions.k8s.io
+ path: spec/conversion/webhook/clientConfig/service/namespace
+ create: false
+
+varReference:
+- path: metadata/annotations
diff --git a/decisions/config/default/cert_metrics_manager_patch.yaml b/decisions/config/default/cert_metrics_manager_patch.yaml
new file mode 100644
index 00000000..d9750155
--- /dev/null
+++ b/decisions/config/default/cert_metrics_manager_patch.yaml
@@ -0,0 +1,30 @@
+# This patch adds the args, volumes, and ports to allow the manager to use the metrics-server certs.
+
+# Add the volumeMount for the metrics-server certs
+- op: add
+ path: /spec/template/spec/containers/0/volumeMounts/-
+ value:
+ mountPath: /tmp/k8s-metrics-server/metrics-certs
+ name: metrics-certs
+ readOnly: true
+
+# Add the --metrics-cert-path argument for the metrics server
+- op: add
+ path: /spec/template/spec/containers/0/args/-
+ value: --metrics-cert-path=/tmp/k8s-metrics-server/metrics-certs
+
+# Add the metrics-server certs volume configuration
+- op: add
+ path: /spec/template/spec/volumes/-
+ value:
+ name: metrics-certs
+ secret:
+ secretName: metrics-server-cert
+ optional: false
+ items:
+ - key: ca.crt
+ path: ca.crt
+ - key: tls.crt
+ path: tls.crt
+ - key: tls.key
+ path: tls.key
diff --git a/decisions/config/default/kustomization.yaml b/decisions/config/default/kustomization.yaml
new file mode 100644
index 00000000..35afcf3b
--- /dev/null
+++ b/decisions/config/default/kustomization.yaml
@@ -0,0 +1,234 @@
+# Adds namespace to all resources.
+namespace: cortex-decisions
+
+# Value of this field is prepended to the
+# names of all resources, e.g. a deployment named
+# "wordpress" becomes "alices-wordpress".
+# Note that it should also match with the prefix (text before '-') of the namespace
+# field above.
+namePrefix: cortex-decisions-
+
+# Labels to add to all resources and selectors.
+#labels:
+#- includeSelectors: true
+# pairs:
+# someName: someValue
+
+resources:
+- ../crd
+- ../rbac
+- ../manager
+# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
+# crd/kustomization.yaml
+#- ../webhook
+# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required.
+#- ../certmanager
+# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
+#- ../prometheus
+# [METRICS] Expose the controller manager metrics service.
+- metrics_service.yaml
+# [NETWORK POLICY] Protect the /metrics endpoint and Webhook Server with NetworkPolicy.
+# Only Pod(s) running a namespace labeled with 'metrics: enabled' will be able to gather the metrics.
+# Only CR(s) which requires webhooks and are applied on namespaces labeled with 'webhooks: enabled' will
+# be able to communicate with the Webhook Server.
+#- ../network-policy
+
+# Uncomment the patches line if you enable Metrics
+patches:
+# [METRICS] The following patch will enable the metrics endpoint using HTTPS and the port :8443.
+# More info: https://book.kubebuilder.io/reference/metrics
+- path: manager_metrics_patch.yaml
+ target:
+ kind: Deployment
+
+# Uncomment the patches line if you enable Metrics and CertManager
+# [METRICS-WITH-CERTS] To enable metrics protected with certManager, uncomment the following line.
+# This patch will protect the metrics with certManager self-signed certs.
+#- path: cert_metrics_manager_patch.yaml
+# target:
+# kind: Deployment
+
+# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
+# crd/kustomization.yaml
+#- path: manager_webhook_patch.yaml
+# target:
+# kind: Deployment
+
+# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix.
+# Uncomment the following replacements to add the cert-manager CA injection annotations
+#replacements:
+# - source: # Uncomment the following block to enable certificates for metrics
+# kind: Service
+# version: v1
+# name: controller-manager-metrics-service
+# fieldPath: metadata.name
+# targets:
+# - select:
+# kind: Certificate
+# group: cert-manager.io
+# version: v1
+# name: metrics-certs
+# fieldPaths:
+# - spec.dnsNames.0
+# - spec.dnsNames.1
+# options:
+# delimiter: '.'
+# index: 0
+# create: true
+# - select: # Uncomment the following to set the Service name for TLS config in Prometheus ServiceMonitor
+# kind: ServiceMonitor
+# group: monitoring.coreos.com
+# version: v1
+# name: controller-manager-metrics-monitor
+# fieldPaths:
+# - spec.endpoints.0.tlsConfig.serverName
+# options:
+# delimiter: '.'
+# index: 0
+# create: true
+
+# - source:
+# kind: Service
+# version: v1
+# name: controller-manager-metrics-service
+# fieldPath: metadata.namespace
+# targets:
+# - select:
+# kind: Certificate
+# group: cert-manager.io
+# version: v1
+# name: metrics-certs
+# fieldPaths:
+# - spec.dnsNames.0
+# - spec.dnsNames.1
+# options:
+# delimiter: '.'
+# index: 1
+# create: true
+# - select: # Uncomment the following to set the Service namespace for TLS in Prometheus ServiceMonitor
+# kind: ServiceMonitor
+# group: monitoring.coreos.com
+# version: v1
+# name: controller-manager-metrics-monitor
+# fieldPaths:
+# - spec.endpoints.0.tlsConfig.serverName
+# options:
+# delimiter: '.'
+# index: 1
+# create: true
+
+# - source: # Uncomment the following block if you have any webhook
+# kind: Service
+# version: v1
+# name: webhook-service
+# fieldPath: .metadata.name # Name of the service
+# targets:
+# - select:
+# kind: Certificate
+# group: cert-manager.io
+# version: v1
+# name: serving-cert
+# fieldPaths:
+# - .spec.dnsNames.0
+# - .spec.dnsNames.1
+# options:
+# delimiter: '.'
+# index: 0
+# create: true
+# - source:
+# kind: Service
+# version: v1
+# name: webhook-service
+# fieldPath: .metadata.namespace # Namespace of the service
+# targets:
+# - select:
+# kind: Certificate
+# group: cert-manager.io
+# version: v1
+# name: serving-cert
+# fieldPaths:
+# - .spec.dnsNames.0
+# - .spec.dnsNames.1
+# options:
+# delimiter: '.'
+# index: 1
+# create: true
+
+# - source: # Uncomment the following block if you have a ValidatingWebhook (--programmatic-validation)
+# kind: Certificate
+# group: cert-manager.io
+# version: v1
+# name: serving-cert # This name should match the one in certificate.yaml
+# fieldPath: .metadata.namespace # Namespace of the certificate CR
+# targets:
+# - select:
+# kind: ValidatingWebhookConfiguration
+# fieldPaths:
+# - .metadata.annotations.[cert-manager.io/inject-ca-from]
+# options:
+# delimiter: '/'
+# index: 0
+# create: true
+# - source:
+# kind: Certificate
+# group: cert-manager.io
+# version: v1
+# name: serving-cert
+# fieldPath: .metadata.name
+# targets:
+# - select:
+# kind: ValidatingWebhookConfiguration
+# fieldPaths:
+# - .metadata.annotations.[cert-manager.io/inject-ca-from]
+# options:
+# delimiter: '/'
+# index: 1
+# create: true
+
+# - source: # Uncomment the following block if you have a DefaultingWebhook (--defaulting )
+# kind: Certificate
+# group: cert-manager.io
+# version: v1
+# name: serving-cert
+# fieldPath: .metadata.namespace # Namespace of the certificate CR
+# targets:
+# - select:
+# kind: MutatingWebhookConfiguration
+# fieldPaths:
+# - .metadata.annotations.[cert-manager.io/inject-ca-from]
+# options:
+# delimiter: '/'
+# index: 0
+# create: true
+# - source:
+# kind: Certificate
+# group: cert-manager.io
+# version: v1
+# name: serving-cert
+# fieldPath: .metadata.name
+# targets:
+# - select:
+# kind: MutatingWebhookConfiguration
+# fieldPaths:
+# - .metadata.annotations.[cert-manager.io/inject-ca-from]
+# options:
+# delimiter: '/'
+# index: 1
+# create: true
+
+# - source: # Uncomment the following block if you have a ConversionWebhook (--conversion)
+# kind: Certificate
+# group: cert-manager.io
+# version: v1
+# name: serving-cert
+# fieldPath: .metadata.namespace # Namespace of the certificate CR
+# targets: # Do not remove or uncomment the following scaffold marker; required to generate code for target CRD.
+# +kubebuilder:scaffold:crdkustomizecainjectionns
+# - source:
+# kind: Certificate
+# group: cert-manager.io
+# version: v1
+# name: serving-cert
+# fieldPath: .metadata.name
+# targets: # Do not remove or uncomment the following scaffold marker; required to generate code for target CRD.
+# +kubebuilder:scaffold:crdkustomizecainjectionname
diff --git a/decisions/config/default/manager_metrics_patch.yaml b/decisions/config/default/manager_metrics_patch.yaml
new file mode 100644
index 00000000..2aaef653
--- /dev/null
+++ b/decisions/config/default/manager_metrics_patch.yaml
@@ -0,0 +1,4 @@
+# This patch adds the args to allow exposing the metrics endpoint using HTTPS
+- op: add
+ path: /spec/template/spec/containers/0/args/0
+ value: --metrics-bind-address=:8443
diff --git a/decisions/config/default/metrics_service.yaml b/decisions/config/default/metrics_service.yaml
new file mode 100644
index 00000000..cd559a2e
--- /dev/null
+++ b/decisions/config/default/metrics_service.yaml
@@ -0,0 +1,18 @@
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ control-plane: controller-manager
+ app.kubernetes.io/name: decisions
+ app.kubernetes.io/managed-by: kustomize
+ name: controller-manager-metrics-service
+ namespace: system
+spec:
+ ports:
+ - name: https
+ port: 8443
+ protocol: TCP
+ targetPort: 8443
+ selector:
+ control-plane: controller-manager
+ app.kubernetes.io/name: decisions
diff --git a/decisions/config/manager/kustomization.yaml b/decisions/config/manager/kustomization.yaml
new file mode 100644
index 00000000..0df5546d
--- /dev/null
+++ b/decisions/config/manager/kustomization.yaml
@@ -0,0 +1,8 @@
+resources:
+- manager.yaml
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+images:
+- name: controller
+ newName: example.com/decisions
+ newTag: v0.0.1
diff --git a/decisions/config/manager/manager.yaml b/decisions/config/manager/manager.yaml
new file mode 100644
index 00000000..91f9f3a4
--- /dev/null
+++ b/decisions/config/manager/manager.yaml
@@ -0,0 +1,77 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+ labels:
+ control-plane: controller-manager
+ app.kubernetes.io/name: decisions
+ app.kubernetes.io/managed-by: kustomize
+ name: system
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: controller-manager
+ namespace: system
+ labels:
+ control-plane: controller-manager
+ app.kubernetes.io/name: decisions
+ app.kubernetes.io/managed-by: kustomize
+spec:
+ selector:
+ matchLabels:
+ control-plane: controller-manager
+ app.kubernetes.io/name: decisions
+ replicas: 1
+ template:
+ metadata:
+ annotations:
+ kubectl.kubernetes.io/default-container: manager
+ labels:
+ control-plane: controller-manager
+ app.kubernetes.io/name: decisions
+ spec:
+ securityContext:
+ # Projects are configured by default to adhere to the "restricted" Pod Security Standards.
+ # This ensures that deployments meet the highest security requirements for Kubernetes.
+ # For more details, see: https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted
+ runAsNonRoot: true
+ seccompProfile:
+ type: RuntimeDefault
+ containers:
+ - command:
+ - /manager
+ args:
+ - --leader-elect
+ - --health-probe-bind-address=:8081
+ image: controller:latest
+ name: manager
+ ports: []
+ securityContext:
+ readOnlyRootFilesystem: true
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - "ALL"
+ livenessProbe:
+ httpGet:
+ path: /healthz
+ port: 8081
+ initialDelaySeconds: 15
+ periodSeconds: 20
+ readinessProbe:
+ httpGet:
+ path: /readyz
+ port: 8081
+ initialDelaySeconds: 5
+ periodSeconds: 10
+ resources:
+ limits:
+ cpu: 500m
+ memory: 128Mi
+ requests:
+ cpu: 10m
+ memory: 64Mi
+ volumeMounts: []
+ volumes: []
+ serviceAccountName: controller-manager
+ terminationGracePeriodSeconds: 10
diff --git a/decisions/config/network-policy/allow-metrics-traffic.yaml b/decisions/config/network-policy/allow-metrics-traffic.yaml
new file mode 100644
index 00000000..da847f1b
--- /dev/null
+++ b/decisions/config/network-policy/allow-metrics-traffic.yaml
@@ -0,0 +1,27 @@
+# This NetworkPolicy allows ingress traffic
+# with Pods running on namespaces labeled with 'metrics: enabled'. Only Pods on those
+# namespaces are able to gather data from the metrics endpoint.
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+ labels:
+ app.kubernetes.io/name: decisions
+ app.kubernetes.io/managed-by: kustomize
+ name: allow-metrics-traffic
+ namespace: system
+spec:
+ podSelector:
+ matchLabels:
+ control-plane: controller-manager
+ app.kubernetes.io/name: decisions
+ policyTypes:
+ - Ingress
+ ingress:
+ # This allows ingress traffic from any namespace with the label metrics: enabled
+ - from:
+ - namespaceSelector:
+ matchLabels:
+ metrics: enabled # Only from namespaces with this label
+ ports:
+ - port: 8443
+ protocol: TCP
diff --git a/decisions/config/network-policy/kustomization.yaml b/decisions/config/network-policy/kustomization.yaml
new file mode 100644
index 00000000..ec0fb5e5
--- /dev/null
+++ b/decisions/config/network-policy/kustomization.yaml
@@ -0,0 +1,2 @@
+resources:
+- allow-metrics-traffic.yaml
diff --git a/decisions/config/prometheus/kustomization.yaml b/decisions/config/prometheus/kustomization.yaml
new file mode 100644
index 00000000..fdc5481b
--- /dev/null
+++ b/decisions/config/prometheus/kustomization.yaml
@@ -0,0 +1,11 @@
+resources:
+- monitor.yaml
+
+# [PROMETHEUS-WITH-CERTS] The following patch configures the ServiceMonitor in ../prometheus
+# to securely reference certificates created and managed by cert-manager.
+# Additionally, ensure that you uncomment the [METRICS WITH CERTMANAGER] patch under config/default/kustomization.yaml
+# to mount the "metrics-server-cert" secret in the Manager Deployment.
+#patches:
+# - path: monitor_tls_patch.yaml
+# target:
+# kind: ServiceMonitor
diff --git a/decisions/config/prometheus/monitor.yaml b/decisions/config/prometheus/monitor.yaml
new file mode 100644
index 00000000..bf0a107e
--- /dev/null
+++ b/decisions/config/prometheus/monitor.yaml
@@ -0,0 +1,27 @@
+# Prometheus Monitor Service (Metrics)
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+ labels:
+ control-plane: controller-manager
+ app.kubernetes.io/name: decisions
+ app.kubernetes.io/managed-by: kustomize
+ name: controller-manager-metrics-monitor
+ namespace: system
+spec:
+ endpoints:
+ - path: /metrics
+ port: https # Ensure this is the name of the port that exposes HTTPS metrics
+ scheme: https
+ bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
+ tlsConfig:
+ # The option insecureSkipVerify: true is not recommended for production since it disables
+ # certificate verification, exposing the system to potential man-in-the-middle attacks.
+ # For production environments, it is recommended to use cert-manager for automatic TLS certificate management.
+ # To apply this configuration, enable cert-manager and use the patch located at config/prometheus/servicemonitor_tls_patch.yaml,
+ # which securely references the certificate from the 'metrics-server-cert' secret.
+ insecureSkipVerify: true
+ selector:
+ matchLabels:
+ control-plane: controller-manager
+ app.kubernetes.io/name: decisions
diff --git a/decisions/config/prometheus/monitor_tls_patch.yaml b/decisions/config/prometheus/monitor_tls_patch.yaml
new file mode 100644
index 00000000..5bf84ce0
--- /dev/null
+++ b/decisions/config/prometheus/monitor_tls_patch.yaml
@@ -0,0 +1,19 @@
+# Patch for Prometheus ServiceMonitor to enable secure TLS configuration
+# using certificates managed by cert-manager
+- op: replace
+ path: /spec/endpoints/0/tlsConfig
+ value:
+ # SERVICE_NAME and SERVICE_NAMESPACE will be substituted by kustomize
+ serverName: SERVICE_NAME.SERVICE_NAMESPACE.svc
+ insecureSkipVerify: false
+ ca:
+ secret:
+ name: metrics-server-cert
+ key: ca.crt
+ cert:
+ secret:
+ name: metrics-server-cert
+ key: tls.crt
+ keySecret:
+ name: metrics-server-cert
+ key: tls.key
diff --git a/decisions/config/rbac/kustomization.yaml b/decisions/config/rbac/kustomization.yaml
new file mode 100644
index 00000000..9a3976b9
--- /dev/null
+++ b/decisions/config/rbac/kustomization.yaml
@@ -0,0 +1,28 @@
+resources:
+# All RBAC will be applied under this service account in
+# the deployment namespace. You may comment out this resource
+# if your manager will use a service account that exists at
+# runtime. Be sure to update RoleBinding and ClusterRoleBinding
+# subjects if changing service account names.
+- service_account.yaml
+- role.yaml
+- role_binding.yaml
+- leader_election_role.yaml
+- leader_election_role_binding.yaml
+# The following RBAC configurations are used to protect
+# the metrics endpoint with authn/authz. These configurations
+# ensure that only authorized users and service accounts
+# can access the metrics endpoint. Comment the following
+# permissions if you want to disable this protection.
+# More info: https://book.kubebuilder.io/reference/metrics.html
+- metrics_auth_role.yaml
+- metrics_auth_role_binding.yaml
+- metrics_reader_role.yaml
+# For each CRD, "Admin", "Editor" and "Viewer" roles are scaffolded by
+# default, aiding admins in cluster management. Those roles are
+# not used by the decisions itself. You can comment the following lines
+# if you do not want those helpers be installed with your Project.
+- schedulingdecision_admin_role.yaml
+- schedulingdecision_editor_role.yaml
+- schedulingdecision_viewer_role.yaml
+
diff --git a/decisions/config/rbac/leader_election_role.yaml b/decisions/config/rbac/leader_election_role.yaml
new file mode 100644
index 00000000..3f1f68b5
--- /dev/null
+++ b/decisions/config/rbac/leader_election_role.yaml
@@ -0,0 +1,40 @@
+# permissions to do leader election.
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+ labels:
+ app.kubernetes.io/name: decisions
+ app.kubernetes.io/managed-by: kustomize
+ name: leader-election-role
+rules:
+- apiGroups:
+ - ""
+ resources:
+ - configmaps
+ verbs:
+ - get
+ - list
+ - watch
+ - create
+ - update
+ - patch
+ - delete
+- apiGroups:
+ - coordination.k8s.io
+ resources:
+ - leases
+ verbs:
+ - get
+ - list
+ - watch
+ - create
+ - update
+ - patch
+ - delete
+- apiGroups:
+ - ""
+ resources:
+ - events
+ verbs:
+ - create
+ - patch
diff --git a/decisions/config/rbac/leader_election_role_binding.yaml b/decisions/config/rbac/leader_election_role_binding.yaml
new file mode 100644
index 00000000..1f6f5652
--- /dev/null
+++ b/decisions/config/rbac/leader_election_role_binding.yaml
@@ -0,0 +1,15 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+ labels:
+ app.kubernetes.io/name: decisions
+ app.kubernetes.io/managed-by: kustomize
+ name: leader-election-rolebinding
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: Role
+ name: leader-election-role
+subjects:
+- kind: ServiceAccount
+ name: controller-manager
+ namespace: system
diff --git a/decisions/config/rbac/metrics_auth_role.yaml b/decisions/config/rbac/metrics_auth_role.yaml
new file mode 100644
index 00000000..32d2e4ec
--- /dev/null
+++ b/decisions/config/rbac/metrics_auth_role.yaml
@@ -0,0 +1,17 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ name: metrics-auth-role
+rules:
+- apiGroups:
+ - authentication.k8s.io
+ resources:
+ - tokenreviews
+ verbs:
+ - create
+- apiGroups:
+ - authorization.k8s.io
+ resources:
+ - subjectaccessreviews
+ verbs:
+ - create
diff --git a/decisions/config/rbac/metrics_auth_role_binding.yaml b/decisions/config/rbac/metrics_auth_role_binding.yaml
new file mode 100644
index 00000000..e775d67f
--- /dev/null
+++ b/decisions/config/rbac/metrics_auth_role_binding.yaml
@@ -0,0 +1,12 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ name: metrics-auth-rolebinding
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ name: metrics-auth-role
+subjects:
+- kind: ServiceAccount
+ name: controller-manager
+ namespace: system
diff --git a/decisions/config/rbac/metrics_reader_role.yaml b/decisions/config/rbac/metrics_reader_role.yaml
new file mode 100644
index 00000000..51a75db4
--- /dev/null
+++ b/decisions/config/rbac/metrics_reader_role.yaml
@@ -0,0 +1,9 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ name: metrics-reader
+rules:
+- nonResourceURLs:
+ - "/metrics"
+ verbs:
+ - get
diff --git a/decisions/config/rbac/role.yaml b/decisions/config/rbac/role.yaml
new file mode 100644
index 00000000..ee66f8a1
--- /dev/null
+++ b/decisions/config/rbac/role.yaml
@@ -0,0 +1,32 @@
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ name: manager-role
+rules:
+- apiGroups:
+ - decisions.cortex
+ resources:
+ - schedulingdecisions
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
+- apiGroups:
+ - decisions.cortex
+ resources:
+ - schedulingdecisions/finalizers
+ verbs:
+ - update
+- apiGroups:
+ - decisions.cortex
+ resources:
+ - schedulingdecisions/status
+ verbs:
+ - get
+ - patch
+ - update
diff --git a/decisions/config/rbac/role_binding.yaml b/decisions/config/rbac/role_binding.yaml
new file mode 100644
index 00000000..6a27d9e9
--- /dev/null
+++ b/decisions/config/rbac/role_binding.yaml
@@ -0,0 +1,15 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ labels:
+ app.kubernetes.io/name: decisions
+ app.kubernetes.io/managed-by: kustomize
+ name: manager-rolebinding
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ name: manager-role
+subjects:
+- kind: ServiceAccount
+ name: controller-manager
+ namespace: system
diff --git a/decisions/config/rbac/schedulingdecision_admin_role.yaml b/decisions/config/rbac/schedulingdecision_admin_role.yaml
new file mode 100644
index 00000000..cd8699f9
--- /dev/null
+++ b/decisions/config/rbac/schedulingdecision_admin_role.yaml
@@ -0,0 +1,27 @@
+# This rule is not used by the project decisions itself.
+# It is provided to allow the cluster admin to help manage permissions for users.
+#
+# Grants full permissions ('*') over decisions.cortex.
+# This role is intended for users authorized to modify roles and bindings within the cluster,
+# enabling them to delegate specific permissions to other users or groups as needed.
+
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ labels:
+ app.kubernetes.io/name: decisions
+ app.kubernetes.io/managed-by: kustomize
+ name: schedulingdecision-admin-role
+rules:
+- apiGroups:
+ - decisions.cortex
+ resources:
+ - schedulingdecisions
+ verbs:
+ - '*'
+- apiGroups:
+ - decisions.cortex
+ resources:
+ - schedulingdecisions/status
+ verbs:
+ - get
diff --git a/decisions/config/rbac/schedulingdecision_editor_role.yaml b/decisions/config/rbac/schedulingdecision_editor_role.yaml
new file mode 100644
index 00000000..864ce9ad
--- /dev/null
+++ b/decisions/config/rbac/schedulingdecision_editor_role.yaml
@@ -0,0 +1,33 @@
+# This rule is not used by the project decisions itself.
+# It is provided to allow the cluster admin to help manage permissions for users.
+#
+# Grants permissions to create, update, and delete resources within the decisions.cortex.
+# This role is intended for users who need to manage these resources
+# but should not control RBAC or manage permissions for others.
+
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ labels:
+ app.kubernetes.io/name: decisions
+ app.kubernetes.io/managed-by: kustomize
+ name: schedulingdecision-editor-role
+rules:
+- apiGroups:
+ - decisions.cortex
+ resources:
+ - schedulingdecisions
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
+- apiGroups:
+ - decisions.cortex
+ resources:
+ - schedulingdecisions/status
+ verbs:
+ - get
diff --git a/decisions/config/rbac/schedulingdecision_viewer_role.yaml b/decisions/config/rbac/schedulingdecision_viewer_role.yaml
new file mode 100644
index 00000000..4d62565e
--- /dev/null
+++ b/decisions/config/rbac/schedulingdecision_viewer_role.yaml
@@ -0,0 +1,29 @@
+# This rule is not used by the project decisions itself.
+# It is provided to allow the cluster admin to help manage permissions for users.
+#
+# Grants read-only access to decisions.cortex resources.
+# This role is intended for users who need visibility into these resources
+# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing.
+
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ labels:
+ app.kubernetes.io/name: decisions
+ app.kubernetes.io/managed-by: kustomize
+ name: schedulingdecision-viewer-role
+rules:
+- apiGroups:
+ - decisions.cortex
+ resources:
+ - schedulingdecisions
+ verbs:
+ - get
+ - list
+ - watch
+- apiGroups:
+ - decisions.cortex
+ resources:
+ - schedulingdecisions/status
+ verbs:
+ - get
diff --git a/decisions/config/rbac/service_account.yaml b/decisions/config/rbac/service_account.yaml
new file mode 100644
index 00000000..1adb8bd8
--- /dev/null
+++ b/decisions/config/rbac/service_account.yaml
@@ -0,0 +1,8 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+ labels:
+ app.kubernetes.io/name: decisions
+ app.kubernetes.io/managed-by: kustomize
+ name: controller-manager
+ namespace: system
diff --git a/decisions/dist/chart/.helmignore b/decisions/dist/chart/.helmignore
new file mode 100644
index 00000000..7d92f7fb
--- /dev/null
+++ b/decisions/dist/chart/.helmignore
@@ -0,0 +1,25 @@
+# Patterns to ignore when building Helm packages.
+# Operating system files
+.DS_Store
+
+# Version control directories
+.git/
+.gitignore
+.bzr/
+.hg/
+.hgignore
+.svn/
+
+# Backup and temporary files
+*.swp
+*.tmp
+*.bak
+*.orig
+*~
+
+# IDE and editor-related files
+.idea/
+.vscode/
+
+# Helm chart artifacts
+dist/chart/*.tgz
diff --git a/decisions/dist/chart/Chart.lock b/decisions/dist/chart/Chart.lock
new file mode 100644
index 00000000..db4c5823
--- /dev/null
+++ b/decisions/dist/chart/Chart.lock
@@ -0,0 +1,6 @@
+dependencies:
+- name: owner-info
+ repository: oci://ghcr.io/sapcc/helm-charts
+ version: 1.0.0
+digest: sha256:7643f231cc4ebda347fd12ec62fe4445c280e2b71d27eec555f3025290f5038f
+generated: "2025-08-26T10:55:05.888651+02:00"
diff --git a/decisions/dist/chart/Chart.yaml b/decisions/dist/chart/Chart.yaml
new file mode 100644
index 00000000..caab06fb
--- /dev/null
+++ b/decisions/dist/chart/Chart.yaml
@@ -0,0 +1,14 @@
+apiVersion: v2
+name: cortex-decisions
+description: A Helm chart to distribute the cortex decisions operator.
+type: application
+version: 0.2.0
+appVersion: "latest"
+icon: "https://example.com/icon.png"
+dependencies:
+ # Owner info adds a configmap to the kubernetes cluster with information on
+ # the service owner. This makes it easier to find out who to contact in case
+ # of issues. See: https://github.com/sapcc/helm-charts/pkgs/container/helm-charts%2Fowner-info
+ - name: owner-info
+ repository: oci://ghcr.io/sapcc/helm-charts
+ version: 1.0.0
diff --git a/decisions/dist/chart/charts/owner-info-1.0.0.tgz b/decisions/dist/chart/charts/owner-info-1.0.0.tgz
new file mode 100644
index 00000000..2032ead9
Binary files /dev/null and b/decisions/dist/chart/charts/owner-info-1.0.0.tgz differ
diff --git a/decisions/dist/chart/templates/_helpers.tpl b/decisions/dist/chart/templates/_helpers.tpl
new file mode 100644
index 00000000..05ce24c6
--- /dev/null
+++ b/decisions/dist/chart/templates/_helpers.tpl
@@ -0,0 +1,50 @@
+{{- define "chart.name" -}}
+{{- if .Chart }}
+ {{- if .Chart.Name }}
+ {{- .Chart.Name | trunc 63 | trimSuffix "-" }}
+ {{- else if .Values.nameOverride }}
+ {{ .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+ {{- else }}
+ decisions
+ {{- end }}
+{{- else }}
+ decisions
+{{- end }}
+{{- end }}
+
+
+{{- define "chart.labels" -}}
+{{- if .Chart.AppVersion -}}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+{{- if .Chart.Version }}
+helm.sh/chart: {{ .Chart.Version | quote }}
+{{- end }}
+app.kubernetes.io/name: {{ include "chart.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+
+{{- define "chart.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "chart.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+
+{{- define "chart.hasMutatingWebhooks" -}}
+{{- $hasMutating := false }}
+{{- range . }}
+ {{- if eq .type "mutating" }}
+ $hasMutating = true }}{{- end }}
+{{- end }}
+{{ $hasMutating }}}}{{- end }}
+
+
+{{- define "chart.hasValidatingWebhooks" -}}
+{{- $hasValidating := false }}
+{{- range . }}
+ {{- if eq .type "validating" }}
+ $hasValidating = true }}{{- end }}
+{{- end }}
+{{ $hasValidating }}}}{{- end }}
diff --git a/decisions/dist/chart/templates/certmanager/certificate.yaml b/decisions/dist/chart/templates/certmanager/certificate.yaml
new file mode 100644
index 00000000..b1b42606
--- /dev/null
+++ b/decisions/dist/chart/templates/certmanager/certificate.yaml
@@ -0,0 +1,36 @@
+{{- if .Values.certmanager.enable }}
+# Self-signed Issuer
+apiVersion: cert-manager.io/v1
+kind: Issuer
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ name: selfsigned-issuer
+ namespace: {{ .Release.Namespace }}
+spec:
+ selfSigned: {}
+{{- if .Values.metrics.enable }}
+---
+# Certificate for the metrics
+apiVersion: cert-manager.io/v1
+kind: Certificate
+metadata:
+ annotations:
+ {{- if .Values.crd.keep }}
+ "helm.sh/resource-policy": keep
+ {{- end }}
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ name: metrics-certs
+ namespace: {{ .Release.Namespace }}
+spec:
+ dnsNames:
+ - decisions.{{ .Release.Namespace }}.svc
+ - decisions.{{ .Release.Namespace }}.svc.cluster.local
+ - decisions-metrics-service.{{ .Release.Namespace }}.svc
+ issuerRef:
+ kind: Issuer
+ name: selfsigned-issuer
+ secretName: metrics-server-cert
+{{- end }}
+{{- end }}
diff --git a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml
new file mode 100644
index 00000000..2cea3946
--- /dev/null
+++ b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml
@@ -0,0 +1,178 @@
+{{- if .Values.crd.enable }}
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ annotations:
+ {{- if .Values.crd.keep }}
+ "helm.sh/resource-policy": keep
+ {{- end }}
+ controller-gen.kubebuilder.io/version: v0.17.2
+ name: schedulingdecisions.decisions.cortex
+spec:
+ group: decisions.cortex
+ names:
+ kind: SchedulingDecision
+ listKind: SchedulingDecisionList
+ plural: schedulingdecisions
+ shortNames:
+ - sdec
+ - sdecs
+ singular: schedulingdecision
+ scope: Cluster
+ versions:
+ - additionalPrinterColumns:
+ - jsonPath: .status.state
+ name: State
+ type: string
+ - jsonPath: .status.error
+ name: Error
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Created
+ type: date
+ - jsonPath: .status.decisionCount
+ name: Decisions
+ type: integer
+ - jsonPath: .spec.decisions[-1].eventType
+ name: Latest Event
+ type: string
+ - jsonPath: .status.globalDescription
+ name: Description
+ type: string
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: SchedulingDecision is the Schema for the schedulingdecisions
+ API
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: spec defines the desired state of SchedulingDecision
+ properties:
+ decisions:
+ items:
+ properties:
+ availabilityZone:
+ type: string
+ eventType:
+ type: string
+ flavor:
+ properties:
+ name:
+ type: string
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ type: object
+ required:
+ - name
+ type: object
+ id:
+ type: string
+ input:
+ additionalProperties:
+ type: number
+ type: object
+ pipeline:
+ properties:
+ name:
+ type: string
+ outputs:
+ items:
+ properties:
+ activations:
+ additionalProperties:
+ type: number
+ type: object
+ step:
+ type: string
+ required:
+ - step
+ type: object
+ type: array
+ required:
+ - name
+ type: object
+ requestedAt:
+ format: date-time
+ type: string
+ required:
+ - eventType
+ - id
+ - pipeline
+ - requestedAt
+ type: object
+ type: array
+ required:
+ - decisions
+ type: object
+ status:
+ description: status defines the observed state of SchedulingDecision
+ properties:
+ decisionCount:
+ type: integer
+ error:
+ type: string
+ globalDescription:
+ type: string
+ results:
+ items:
+ description: SchedulingDecisionResult represents the result of processing
+ a single decision request.
+ properties:
+ deletedHosts:
+ additionalProperties:
+ items:
+ type: string
+ type: array
+ description: Hosts that were deleted during pipeline processing
+ and all steps that attempted to delete them.
+ type: object
+ description:
+ type: string
+ finalScores:
+ additionalProperties:
+ type: number
+ description: Final scores for each host after processing all
+ pipeline steps.
+ type: object
+ id:
+ type: string
+ required:
+ - id
+ type: object
+ type: array
+ state:
+ type: string
+ type: object
+ required:
+ - spec
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
+{{- end -}}
diff --git a/decisions/dist/chart/templates/manager/manager.yaml b/decisions/dist/chart/templates/manager/manager.yaml
new file mode 100644
index 00000000..060d2e6f
--- /dev/null
+++ b/decisions/dist/chart/templates/manager/manager.yaml
@@ -0,0 +1,107 @@
+# This file is safe from kubebuilder edit --plugins=helm/v1-alpha
+# If you want to re-generate, add the --force flag.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: decisions-controller-manager
+ namespace: {{ .Release.Namespace }}
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ control-plane: controller-manager
+spec:
+ replicas: {{ .Values.controllerManager.replicas }}
+ selector:
+ matchLabels:
+ {{- include "chart.selectorLabels" . | nindent 6 }}
+ control-plane: controller-manager
+ template:
+ metadata:
+ annotations:
+ kubectl.kubernetes.io/default-container: manager
+ labels:
+ {{- include "chart.labels" . | nindent 8 }}
+ control-plane: controller-manager
+ {{- if and .Values.controllerManager.pod .Values.controllerManager.pod.labels }}
+ {{- range $key, $value := .Values.controllerManager.pod.labels }}
+ {{ $key }}: {{ $value }}
+ {{- end }}
+ {{- end }}
+ spec:
+ containers:
+ - name: manager
+ args:
+ {{- range .Values.controllerManager.container.args }}
+ - {{ . }}
+ {{- end }}
+ command:
+ - /manager
+ image: {{ .Values.controllerManager.container.image.repository }}:{{ .Values.controllerManager.container.image.tag | default .Chart.AppVersion }}
+ {{- if .Values.controllerManager.container.env }}
+ env:
+ {{- range $key, $value := .Values.controllerManager.container.env }}
+ - name: {{ $key }}
+ value: {{ $value }}
+ {{- end }}
+ {{- end }}
+ livenessProbe:
+ {{- toYaml .Values.controllerManager.container.livenessProbe | nindent 12 }}
+ readinessProbe:
+ {{- toYaml .Values.controllerManager.container.readinessProbe | nindent 12 }}
+ resources:
+ {{- toYaml .Values.controllerManager.container.resources | nindent 12 }}
+ securityContext:
+ {{- toYaml .Values.controllerManager.container.securityContext | nindent 12 }}
+ volumeMounts:
+ - name: decisions-controller-manager-config-volume
+ mountPath: /etc/config
+ - name: decisions-controller-manager-secrets-volume
+ mountPath: /etc/secrets
+ readOnly: true
+ {{- if and .Values.metrics.enable .Values.certmanager.enable }}
+ - name: metrics-certs
+ mountPath: /tmp/k8s-metrics-server/metrics-certs
+ readOnly: true
+ {{- end }}
+ securityContext:
+ {{- toYaml .Values.controllerManager.securityContext | nindent 8 }}
+ serviceAccountName: {{ .Values.controllerManager.serviceAccountName }}
+ terminationGracePeriodSeconds: {{ .Values.controllerManager.terminationGracePeriodSeconds }}
+ volumes:
+ # Custom values to configure the controller-manager.
+ - name: decisions-controller-manager-config-volume
+ configMap:
+ name: decisions-controller-manager-config
+ - name: decisions-controller-manager-secrets-volume
+ secret:
+ secretName: decisions-controller-manager-secrets
+ {{- if and .Values.metrics.enable .Values.certmanager.enable }}
+ - name: metrics-certs
+ secret:
+ secretName: metrics-server-cert
+ {{- end }}
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: decisions-controller-manager-config
+data:
+ conf.json: |-
+ {{- if .Values.decisions.conf }}
+ {{ toJson .Values.decisions.conf }}
+ {{- else }}
+ {}
+ {{- end }}
+---
+apiVersion: v1
+kind: Secret
+metadata:
+ name: decisions-controller-manager-secrets
+type: Opaque
+data:
+ secrets.json: |-
+ {{- if .Values.decisions.secrets }}
+ {{ toJson .Values.decisions.secrets | b64enc }}
+ {{- else }}
+ {{ "{}" | b64enc }}
+ {{- end }}
\ No newline at end of file
diff --git a/decisions/dist/chart/templates/metrics/metrics-service.yaml b/decisions/dist/chart/templates/metrics/metrics-service.yaml
new file mode 100644
index 00000000..818e728d
--- /dev/null
+++ b/decisions/dist/chart/templates/metrics/metrics-service.yaml
@@ -0,0 +1,18 @@
+{{- if .Values.metrics.enable }}
+apiVersion: v1
+kind: Service
+metadata:
+ name: decisions-controller-manager-metrics-service
+ namespace: {{ .Release.Namespace }}
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ control-plane: controller-manager
+spec:
+ ports:
+ - port: 8443
+ targetPort: 8443
+ protocol: TCP
+ name: https
+ selector:
+ control-plane: controller-manager
+{{- end }}
diff --git a/decisions/dist/chart/templates/network-policy/allow-metrics-traffic.yaml b/decisions/dist/chart/templates/network-policy/allow-metrics-traffic.yaml
new file mode 100644
index 00000000..9d54a550
--- /dev/null
+++ b/decisions/dist/chart/templates/network-policy/allow-metrics-traffic.yaml
@@ -0,0 +1,28 @@
+{{- if .Values.networkPolicy.enable }}
+# This NetworkPolicy allows ingress traffic
+# with Pods running on namespaces labeled with 'metrics: enabled'. Only Pods on those
+# namespaces are able to gather data from the metrics endpoint.
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ name: allow-metrics-traffic
+ namespace: {{ .Release.Namespace }}
+spec:
+ podSelector:
+ matchLabels:
+ control-plane: controller-manager
+ app.kubernetes.io/name: decisions
+ policyTypes:
+ - Ingress
+ ingress:
+ # This allows ingress traffic from any namespace with the label metrics: enabled
+ - from:
+ - namespaceSelector:
+ matchLabels:
+ metrics: enabled # Only from namespaces with this label
+ ports:
+ - port: 8443
+ protocol: TCP
+{{- end -}}
diff --git a/decisions/dist/chart/templates/prometheus/monitor.yaml b/decisions/dist/chart/templates/prometheus/monitor.yaml
new file mode 100644
index 00000000..1720ee91
--- /dev/null
+++ b/decisions/dist/chart/templates/prometheus/monitor.yaml
@@ -0,0 +1,40 @@
+# To integrate with Prometheus.
+{{- if .Values.prometheus.enable }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ control-plane: controller-manager
+ name: decisions-controller-manager-metrics-monitor
+ namespace: {{ .Release.Namespace }}
+spec:
+ endpoints:
+ - path: /metrics
+ port: https
+ scheme: https
+ bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
+ tlsConfig:
+ {{- if .Values.certmanager.enable }}
+ serverName: decisions-controller-manager-metrics-service.{{ .Release.Namespace }}.svc
+ # Apply secure TLS configuration with cert-manager
+ insecureSkipVerify: false
+ ca:
+ secret:
+ name: metrics-server-cert
+ key: ca.crt
+ cert:
+ secret:
+ name: metrics-server-cert
+ key: tls.crt
+ keySecret:
+ name: metrics-server-cert
+ key: tls.key
+ {{- else }}
+ # Development/Test mode (insecure configuration)
+ insecureSkipVerify: true
+ {{- end }}
+ selector:
+ matchLabels:
+ control-plane: controller-manager
+{{- end }}
diff --git a/decisions/dist/chart/templates/rbac/leader_election_role.yaml b/decisions/dist/chart/templates/rbac/leader_election_role.yaml
new file mode 100644
index 00000000..5e5e2ded
--- /dev/null
+++ b/decisions/dist/chart/templates/rbac/leader_election_role.yaml
@@ -0,0 +1,42 @@
+{{- if .Values.rbac.enable }}
+# permissions to do leader election.
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ namespace: {{ .Release.Namespace }}
+ name: decisions-leader-election-role
+rules:
+- apiGroups:
+ - ""
+ resources:
+ - configmaps
+ verbs:
+ - get
+ - list
+ - watch
+ - create
+ - update
+ - patch
+ - delete
+- apiGroups:
+ - coordination.k8s.io
+ resources:
+ - leases
+ verbs:
+ - get
+ - list
+ - watch
+ - create
+ - update
+ - patch
+ - delete
+- apiGroups:
+ - ""
+ resources:
+ - events
+ verbs:
+ - create
+ - patch
+{{- end -}}
diff --git a/decisions/dist/chart/templates/rbac/leader_election_role_binding.yaml b/decisions/dist/chart/templates/rbac/leader_election_role_binding.yaml
new file mode 100644
index 00000000..a4be63be
--- /dev/null
+++ b/decisions/dist/chart/templates/rbac/leader_election_role_binding.yaml
@@ -0,0 +1,17 @@
+{{- if .Values.rbac.enable }}
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ namespace: {{ .Release.Namespace }}
+ name: decisions-leader-election-rolebinding
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: Role
+ name: decisions-leader-election-role
+subjects:
+- kind: ServiceAccount
+ name: {{ .Values.controllerManager.serviceAccountName }}
+ namespace: {{ .Release.Namespace }}
+{{- end -}}
diff --git a/decisions/dist/chart/templates/rbac/metrics_auth_role.yaml b/decisions/dist/chart/templates/rbac/metrics_auth_role.yaml
new file mode 100644
index 00000000..8ed40055
--- /dev/null
+++ b/decisions/dist/chart/templates/rbac/metrics_auth_role.yaml
@@ -0,0 +1,21 @@
+{{- if and .Values.rbac.enable .Values.metrics.enable }}
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ name: decisions-metrics-auth-role
+rules:
+- apiGroups:
+ - authentication.k8s.io
+ resources:
+ - tokenreviews
+ verbs:
+ - create
+- apiGroups:
+ - authorization.k8s.io
+ resources:
+ - subjectaccessreviews
+ verbs:
+ - create
+{{- end -}}
diff --git a/decisions/dist/chart/templates/rbac/metrics_auth_role_binding.yaml b/decisions/dist/chart/templates/rbac/metrics_auth_role_binding.yaml
new file mode 100644
index 00000000..d3ca3c7e
--- /dev/null
+++ b/decisions/dist/chart/templates/rbac/metrics_auth_role_binding.yaml
@@ -0,0 +1,16 @@
+{{- if and .Values.rbac.enable .Values.metrics.enable }}
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ name: decisions-metrics-auth-rolebinding
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ name: decisions-metrics-auth-role
+subjects:
+- kind: ServiceAccount
+ name: {{ .Values.controllerManager.serviceAccountName }}
+ namespace: {{ .Release.Namespace }}
+{{- end -}}
diff --git a/decisions/dist/chart/templates/rbac/metrics_reader_role.yaml b/decisions/dist/chart/templates/rbac/metrics_reader_role.yaml
new file mode 100644
index 00000000..81f7da70
--- /dev/null
+++ b/decisions/dist/chart/templates/rbac/metrics_reader_role.yaml
@@ -0,0 +1,13 @@
+{{- if and .Values.rbac.enable .Values.metrics.enable }}
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ name: decisions-metrics-reader
+rules:
+- nonResourceURLs:
+ - "/metrics"
+ verbs:
+ - get
+{{- end -}}
diff --git a/decisions/dist/chart/templates/rbac/role.yaml b/decisions/dist/chart/templates/rbac/role.yaml
new file mode 100644
index 00000000..b93e56fc
--- /dev/null
+++ b/decisions/dist/chart/templates/rbac/role.yaml
@@ -0,0 +1,36 @@
+{{- if .Values.rbac.enable }}
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ name: decisions-manager-role
+rules:
+- apiGroups:
+ - decisions.cortex
+ resources:
+ - schedulingdecisions
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
+- apiGroups:
+ - decisions.cortex
+ resources:
+ - schedulingdecisions/finalizers
+ verbs:
+ - update
+- apiGroups:
+ - decisions.cortex
+ resources:
+ - schedulingdecisions/status
+ verbs:
+ - get
+ - patch
+ - update
+{{- end -}}
diff --git a/decisions/dist/chart/templates/rbac/role_binding.yaml b/decisions/dist/chart/templates/rbac/role_binding.yaml
new file mode 100644
index 00000000..09804a2a
--- /dev/null
+++ b/decisions/dist/chart/templates/rbac/role_binding.yaml
@@ -0,0 +1,16 @@
+{{- if .Values.rbac.enable }}
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ name: decisions-manager-rolebinding
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ name: decisions-manager-role
+subjects:
+- kind: ServiceAccount
+ name: {{ .Values.controllerManager.serviceAccountName }}
+ namespace: {{ .Release.Namespace }}
+{{- end -}}
diff --git a/decisions/dist/chart/templates/rbac/schedulingdecision_admin_role.yaml b/decisions/dist/chart/templates/rbac/schedulingdecision_admin_role.yaml
new file mode 100644
index 00000000..6db64811
--- /dev/null
+++ b/decisions/dist/chart/templates/rbac/schedulingdecision_admin_role.yaml
@@ -0,0 +1,28 @@
+{{- if .Values.rbac.enable }}
+# This rule is not used by the project decisions itself.
+# It is provided to allow the cluster admin to help manage permissions for users.
+#
+# Grants full permissions ('*') over decisions.cortex.
+# This role is intended for users authorized to modify roles and bindings within the cluster,
+# enabling them to delegate specific permissions to other users or groups as needed.
+
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ name: schedulingdecision-admin-role
+rules:
+- apiGroups:
+ - decisions.cortex
+ resources:
+ - schedulingdecisions
+ verbs:
+ - '*'
+- apiGroups:
+ - decisions.cortex
+ resources:
+ - schedulingdecisions/status
+ verbs:
+ - get
+{{- end -}}
diff --git a/decisions/dist/chart/templates/rbac/schedulingdecision_editor_role.yaml b/decisions/dist/chart/templates/rbac/schedulingdecision_editor_role.yaml
new file mode 100644
index 00000000..7a82611c
--- /dev/null
+++ b/decisions/dist/chart/templates/rbac/schedulingdecision_editor_role.yaml
@@ -0,0 +1,34 @@
+{{- if .Values.rbac.enable }}
+# This rule is not used by the project decisions itself.
+# It is provided to allow the cluster admin to help manage permissions for users.
+#
+# Grants permissions to create, update, and delete resources within the decisions.cortex.
+# This role is intended for users who need to manage these resources
+# but should not control RBAC or manage permissions for others.
+
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ name: schedulingdecision-editor-role
+rules:
+- apiGroups:
+ - decisions.cortex
+ resources:
+ - schedulingdecisions
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
+- apiGroups:
+ - decisions.cortex
+ resources:
+ - schedulingdecisions/status
+ verbs:
+ - get
+{{- end -}}
diff --git a/decisions/dist/chart/templates/rbac/schedulingdecision_viewer_role.yaml b/decisions/dist/chart/templates/rbac/schedulingdecision_viewer_role.yaml
new file mode 100644
index 00000000..4375bd65
--- /dev/null
+++ b/decisions/dist/chart/templates/rbac/schedulingdecision_viewer_role.yaml
@@ -0,0 +1,30 @@
+{{- if .Values.rbac.enable }}
+# This rule is not used by the project decisions itself.
+# It is provided to allow the cluster admin to help manage permissions for users.
+#
+# Grants read-only access to decisions.cortex resources.
+# This role is intended for users who need visibility into these resources
+# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing.
+
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ name: schedulingdecision-viewer-role
+rules:
+- apiGroups:
+ - decisions.cortex
+ resources:
+ - schedulingdecisions
+ verbs:
+ - get
+ - list
+ - watch
+- apiGroups:
+ - decisions.cortex
+ resources:
+ - schedulingdecisions/status
+ verbs:
+ - get
+{{- end -}}
diff --git a/decisions/dist/chart/templates/rbac/service_account.yaml b/decisions/dist/chart/templates/rbac/service_account.yaml
new file mode 100644
index 00000000..93e0a323
--- /dev/null
+++ b/decisions/dist/chart/templates/rbac/service_account.yaml
@@ -0,0 +1,15 @@
+{{- if .Values.rbac.enable }}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ {{- if and .Values.controllerManager.serviceAccount .Values.controllerManager.serviceAccount.annotations }}
+ annotations:
+ {{- range $key, $value := .Values.controllerManager.serviceAccount.annotations }}
+ {{ $key }}: {{ $value }}
+ {{- end }}
+ {{- end }}
+ name: {{ .Values.controllerManager.serviceAccountName }}
+ namespace: {{ .Release.Namespace }}
+{{- end -}}
diff --git a/decisions/dist/chart/values.yaml b/decisions/dist/chart/values.yaml
new file mode 100644
index 00000000..8ff57f39
--- /dev/null
+++ b/decisions/dist/chart/values.yaml
@@ -0,0 +1,131 @@
+# This file is safe from kubebuilder edit --plugins=helm/v1-alpha
+# If you want to re-generate, add the --force flag.
+
+owner-info:
+ enabled: true
+ helm-chart-url: "https://github.com/cobaltcore-dev/cortex/decisions/dist/chart"
+ maintainers:
+ - "p.matthes@sap.com"
+ - "markus.wieland@sap.com"
+ - "arno.uhlig@sap.com"
+ support-group: "workload-management"
+ service: "cortex-decisions"
+
+# [MANAGER]: Manager Deployment Configurations
+controllerManager:
+ replicas: 1
+ container:
+ image:
+ repository: ghcr.io/cobaltcore-dev/cortex-decisions-operator
+ args:
+ - "--leader-elect"
+ - "--metrics-bind-address=:8443"
+ - "--health-probe-bind-address=:8081"
+ resources:
+ limits:
+ cpu: 500m
+ memory: 512Mi
+ requests:
+ cpu: 10m
+ memory: 64Mi
+ livenessProbe:
+ initialDelaySeconds: 15
+ periodSeconds: 20
+ httpGet:
+ path: /healthz
+ port: 8081
+ readinessProbe:
+ initialDelaySeconds: 5
+ periodSeconds: 10
+ httpGet:
+ path: /readyz
+ port: 8081
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - "ALL"
+ securityContext:
+ runAsNonRoot: true
+ seccompProfile:
+ type: RuntimeDefault
+ terminationGracePeriodSeconds: 10
+ serviceAccountName: decisions-controller-manager
+
+# [RBAC]: To enable RBAC (Permissions) configurations
+rbac:
+ enable: true
+
+# [CRDs]: To enable the CRDs
+crd:
+ # This option determines whether the CRDs are included
+ # in the installation process.
+ enable: true
+
+ # Enabling this option adds the "helm.sh/resource-policy": keep
+ # annotation to the CRD, ensuring it remains installed even when
+ # the Helm release is uninstalled.
+ # NOTE: Removing the CRDs will also remove all cert-manager CR(s)
+ # (Certificates, Issuers, ...) due to garbage collection.
+ keep: true
+
+# [METRICS]: Set to true to generate manifests for exporting metrics.
+# To disable metrics export set false, and ensure that the
+# ControllerManager argument "--metrics-bind-address=:8443" is removed.
+metrics:
+ enable: true
+
+# [PROMETHEUS]: To enable a ServiceMonitor to export metrics to Prometheus set true
+prometheus:
+ enable: true
+
+# [CERT-MANAGER]: To enable cert-manager injection to webhooks set true
+certmanager:
+ enable: false
+
+# [NETWORK POLICIES]: To enable NetworkPolicies set true
+networkPolicy:
+ enable: false
+
+# SSO certificate to use.
+sharedSSOCert: &sharedSSOCert
+ # Certificate "public key". (Optional, remove this key if not needed)
+ cert: |
+ -----BEGIN CERTIFICATE-----
+ Your certificate here
+ -----END CERTIFICATE-----
+ # Certificate private key. (Optional, remove this key if not needed)
+ certKey: |
+ -----BEGIN PRIVATE KEY-----
+ Your private key here
+ -----END PRIVATE KEY
+ # Whether the certificate is self-signed.
+ # If true, the certificate is not verified.
+ selfSigned: false
+
+decisions:
+ # Default configuration provided through configmap to the operator.
+ conf:
+ # Which hypervisor types should be handled by the operator.
+ hypervisors:
+ - "QEMU"
+ - "CH"
+ # Not supported:
+ # - "VMware vCenter Server"
+ # - "ironic"
+ endpoints:
+ # The URL of the Nova external scheduler service.
+ novaExternalScheduler: "http://cortex-nova-scheduler:8080/scheduler/nova/external"
+ # TTL for scheduling decisions after the last decision's RequestedAt timestamp (in seconds)
+ ttlAfterDecisionSeconds: 86400
+ # Config provided here will override the config provided above.
+ secrets:
+ # Override the endpoints and credentials to your OpenStack.
+ keystone:
+ url: https://path-to-keystone/v3
+ sso: *sharedSSOCert
+ username: openstack-user-with-all-project-read-access
+ password: openstack-user-password
+ projectName: openstack-project-of-user
+ userDomainName: openstack-domain-of-user
+ projectDomainName: openstack-domain-of-project-scoped-to
diff --git a/decisions/go.mod b/decisions/go.mod
new file mode 100644
index 00000000..7d25ec49
--- /dev/null
+++ b/decisions/go.mod
@@ -0,0 +1,105 @@
+module github.com/cobaltcore-dev/cortex/decisions
+
+go 1.25.0
+
+replace (
+ github.com/cobaltcore-dev/cortex => ../
+ github.com/cobaltcore-dev/cortex/decisions/api => ./api
+)
+
+require (
+ github.com/cobaltcore-dev/cortex v0.0.0-00010101000000-000000000000
+ github.com/cobaltcore-dev/cortex/decisions/api v0.0.0-00010101000000-000000000000
+ k8s.io/apimachinery v0.34.1
+ k8s.io/client-go v0.34.1
+ sigs.k8s.io/controller-runtime v0.22.1
+)
+
+require (
+ github.com/pmezard/go-difflib v1.0.0 // indirect
+ go.yaml.in/yaml/v2 v2.4.2 // indirect
+ go.yaml.in/yaml/v3 v3.0.4 // indirect
+ sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect
+)
+
+require (
+ cel.dev/expr v0.24.0 // indirect
+ github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
+ github.com/beorn7/perks v1.0.1 // indirect
+ github.com/blang/semver/v4 v4.0.0 // indirect
+ github.com/cenkalti/backoff/v4 v4.3.0 // indirect
+ github.com/cespare/xxhash/v2 v2.3.0 // indirect
+ github.com/davecgh/go-spew v1.1.1 // indirect
+ github.com/emicklei/go-restful/v3 v3.12.2 // indirect
+ github.com/evanphx/json-patch/v5 v5.9.11 // indirect
+ github.com/felixge/httpsnoop v1.0.4 // indirect
+ github.com/fsnotify/fsnotify v1.9.0 // indirect
+ github.com/fxamacker/cbor/v2 v2.9.0 // indirect
+ github.com/go-logr/logr v1.4.3 // indirect
+ github.com/go-logr/stdr v1.2.2 // indirect
+ github.com/go-logr/zapr v1.3.0 // indirect
+ github.com/go-openapi/jsonpointer v0.21.0 // indirect
+ github.com/go-openapi/jsonreference v0.20.2 // indirect
+ github.com/go-openapi/swag v0.23.0 // indirect
+ github.com/gogo/protobuf v1.3.2 // indirect
+ github.com/google/btree v1.1.3 // indirect
+ github.com/google/cel-go v0.26.0 // indirect
+ github.com/google/gnostic-models v0.7.0 // indirect
+ github.com/google/go-cmp v0.7.0 // indirect
+ github.com/google/uuid v1.6.0 // indirect
+ github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 // indirect
+ github.com/inconshreveable/mousetrap v1.1.0 // indirect
+ github.com/josharian/intern v1.0.0 // indirect
+ github.com/json-iterator/go v1.1.12 // indirect
+ github.com/mailru/easyjson v0.7.7 // indirect
+ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+ github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
+ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
+ github.com/pkg/errors v0.9.1 // indirect
+ github.com/prometheus/client_golang v1.23.2 // indirect
+ github.com/prometheus/client_model v0.6.2 // indirect
+ github.com/prometheus/common v0.66.1 // indirect
+ github.com/prometheus/procfs v0.17.0 // indirect
+ github.com/spf13/cobra v1.9.1 // indirect
+ github.com/spf13/pflag v1.0.6 // indirect
+ github.com/stoewer/go-strcase v1.3.0 // indirect
+ github.com/x448/float16 v0.8.4 // indirect
+ go.opentelemetry.io/auto/sdk v1.1.0 // indirect
+ go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 // indirect
+ go.opentelemetry.io/otel v1.37.0 // indirect
+ go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0 // indirect
+ go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0 // indirect
+ go.opentelemetry.io/otel/metric v1.37.0 // indirect
+ go.opentelemetry.io/otel/sdk v1.35.0 // indirect
+ go.opentelemetry.io/otel/trace v1.37.0 // indirect
+ go.opentelemetry.io/proto/otlp v1.5.0 // indirect
+ go.uber.org/multierr v1.11.0 // indirect
+ go.uber.org/zap v1.27.0 // indirect
+ golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
+ golang.org/x/net v0.44.0 // indirect
+ golang.org/x/oauth2 v0.30.0 // indirect
+ golang.org/x/sync v0.17.0 // indirect
+ golang.org/x/sys v0.36.0 // indirect
+ golang.org/x/term v0.35.0 // indirect
+ golang.org/x/text v0.29.0 // indirect
+ golang.org/x/time v0.12.0 // indirect
+ gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
+ google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb // indirect
+ google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb // indirect
+ google.golang.org/grpc v1.72.1 // indirect
+ google.golang.org/protobuf v1.36.8 // indirect
+ gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
+ gopkg.in/inf.v0 v0.9.1 // indirect
+ gopkg.in/yaml.v3 v3.0.1 // indirect
+ k8s.io/api v0.34.1 // indirect
+ k8s.io/apiextensions-apiserver v0.34.0 // indirect
+ k8s.io/apiserver v0.34.0 // indirect
+ k8s.io/component-base v0.34.0 // indirect
+ k8s.io/klog/v2 v2.130.1 // indirect
+ k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect
+ k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect
+ sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect
+ sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect
+ sigs.k8s.io/randfill v1.0.0 // indirect
+ sigs.k8s.io/yaml v1.6.0 // indirect
+)
diff --git a/decisions/go.sum b/decisions/go.sum
new file mode 100644
index 00000000..bc272d7d
--- /dev/null
+++ b/decisions/go.sum
@@ -0,0 +1,259 @@
+cel.dev/expr v0.24.0 h1:56OvJKSH3hDGL0ml5uSxZmz3/3Pq4tJ+fb1unVLAFcY=
+cel.dev/expr v0.24.0/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw=
+github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI=
+github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g=
+github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
+github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
+github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
+github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
+github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
+github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
+github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
+github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU=
+github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
+github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k=
+github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ=
+github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU=
+github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM=
+github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
+github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
+github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
+github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
+github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM=
+github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ=
+github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
+github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
+github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
+github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ=
+github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg=
+github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs=
+github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
+github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
+github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE=
+github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k=
+github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
+github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
+github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
+github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
+github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
+github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
+github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
+github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
+github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
+github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
+github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
+github.com/google/cel-go v0.26.0 h1:DPGjXackMpJWH680oGY4lZhYjIameYmR+/6RBdDGmaI=
+github.com/google/cel-go v0.26.0/go.mod h1:A9O8OU9rdvrK5MQyrqfIxo1a0u4g3sF8KB6PUIaryMM=
+github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo=
+github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ=
+github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
+github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
+github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo=
+github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 h1:5ZPtiqj0JL5oKWmcsq4VMaAW5ukBEgSGXEN89zeH1Jo=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3/go.mod h1:ndYquD05frm2vACXE1nsccT4oJzjhw2arTS2cpUD1PI=
+github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
+github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
+github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
+github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
+github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
+github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
+github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
+github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
+github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8=
+github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
+github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg=
+github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo=
+github.com/onsi/gomega v1.36.1 h1:bJDPBO7ibjxcbHMgSCoo4Yj18UWbKDlLwX1x9sybDcw=
+github.com/onsi/gomega v1.36.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
+github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
+github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
+github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
+github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
+github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
+github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0=
+github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw=
+github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
+github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
+github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo=
+github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0=
+github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
+github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs=
+github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
+github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
+github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
+github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
+github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
+go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 h1:sbiXRNDSWJOTobXh5HyQKjq6wUC5tNybqjIqDpAY4CU=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0/go.mod h1:69uWxva0WgAA/4bu2Yy70SLDBwZXuQ6PbBpbsa5iZrQ=
+go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ=
+go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0 h1:OeNbIYk/2C15ckl7glBlOBp5+WlYsOElzTNmiPW/x60=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0/go.mod h1:7Bept48yIeqxP2OZ9/AqIpYS94h2or0aB4FypJTc8ZM=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0 h1:tgJ0uaNS4c98WRNUEx5U3aDlrDOI5Rs+1Vifcw4DJ8U=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0/go.mod h1:U7HYyW0zt/a9x5J1Kjs+r1f/d4ZHnYFclhYY2+YbeoE=
+go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE=
+go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E=
+go.opentelemetry.io/otel/sdk v1.35.0 h1:iPctf8iprVySXSKJffSS79eOjl9pvxV9ZqOWT0QejKY=
+go.opentelemetry.io/otel/sdk v1.35.0/go.mod h1:+ga1bZliga3DxJ3CQGg3updiaAJoNECOgJREo9KHGQg=
+go.opentelemetry.io/otel/sdk/metric v1.35.0 h1:1RriWBmCKgkeHEhM7a2uMjMUfP7MsOF5JpUCaEqEI9o=
+go.opentelemetry.io/otel/sdk/metric v1.35.0/go.mod h1:is6XYCUMpcKi+ZsOvfluY5YstFnhW0BidkR+gL+qN+w=
+go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4=
+go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0=
+go.opentelemetry.io/proto/otlp v1.5.0 h1:xJvq7gMzB31/d406fB8U5CBdyQGw4P399D1aQWU/3i4=
+go.opentelemetry.io/proto/otlp v1.5.0/go.mod h1:keN8WnHxOy8PG0rQZjJJ5A2ebUoafqWp0eVQ4yIXvJ4=
+go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
+go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
+go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
+go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
+go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
+go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
+go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
+go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
+go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
+go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=
+golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
+golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.44.0 h1:evd8IRDyfNBMBTTY5XRF1vaZlD+EmWx6x8PkhR04H/I=
+golang.org/x/net v0.44.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY=
+golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
+golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
+golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k=
+golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ=
+golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk=
+golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4=
+golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
+golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg=
+golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw=
+gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY=
+google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb h1:p31xT4yrYrSM/G4Sn2+TNUkVhFCbG9y8itM2S6Th950=
+google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb/go.mod h1:jbe3Bkdp+Dh2IrslsFCklNhweNTBgSYanP1UXhJDhKg=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb h1:TLPQVbx1GJ8VKZxz52VAxl1EBgKXXbTiU9Fc5fZeLn4=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
+google.golang.org/grpc v1.72.1 h1:HR03wO6eyZ7lknl75XlxABNVLLFc2PAb6mHlYh756mA=
+google.golang.org/grpc v1.72.1/go.mod h1:wH5Aktxcg25y1I3w7H69nHfXdOG3UiadoBtjh3izSDM=
+google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
+google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
+gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4=
+gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
+gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
+gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+k8s.io/api v0.34.1 h1:jC+153630BMdlFukegoEL8E/yT7aLyQkIVuwhmwDgJM=
+k8s.io/api v0.34.1/go.mod h1:SB80FxFtXn5/gwzCoN6QCtPD7Vbu5w2n1S0J5gFfTYk=
+k8s.io/apiextensions-apiserver v0.34.0 h1:B3hiB32jV7BcyKcMU5fDaDxk882YrJ1KU+ZSkA9Qxoc=
+k8s.io/apiextensions-apiserver v0.34.0/go.mod h1:hLI4GxE1BDBy9adJKxUxCEHBGZtGfIg98Q+JmTD7+g0=
+k8s.io/apimachinery v0.34.1 h1:dTlxFls/eikpJxmAC7MVE8oOeP1zryV7iRyIjB0gky4=
+k8s.io/apimachinery v0.34.1/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw=
+k8s.io/apiserver v0.34.0 h1:Z51fw1iGMqN7uJ1kEaynf2Aec1Y774PqU+FVWCFV3Jg=
+k8s.io/apiserver v0.34.0/go.mod h1:52ti5YhxAvewmmpVRqlASvaqxt0gKJxvCeW7ZrwgazQ=
+k8s.io/client-go v0.34.1 h1:ZUPJKgXsnKwVwmKKdPfw4tB58+7/Ik3CrjOEhsiZ7mY=
+k8s.io/client-go v0.34.1/go.mod h1:kA8v0FP+tk6sZA0yKLRG67LWjqufAoSHA2xVGKw9Of8=
+k8s.io/component-base v0.34.0 h1:bS8Ua3zlJzapklsB1dZgjEJuJEeHjj8yTu1gxE2zQX8=
+k8s.io/component-base v0.34.0/go.mod h1:RSCqUdvIjjrEm81epPcjQ/DS+49fADvGSCkIP3IC6vg=
+k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
+k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
+k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b h1:MloQ9/bdJyIu9lb1PzujOPolHyvO06MXG5TUIj2mNAA=
+k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts=
+k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y=
+k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
+sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 h1:jpcvIRr3GLoUoEKRkHKSmGjxb6lWwrBlJsXc+eUYQHM=
+sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw=
+sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV24Eqg=
+sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY=
+sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE=
+sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
+sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
+sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
+sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco=
+sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE=
+sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=
+sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4=
diff --git a/decisions/hack/boilerplate.go.txt b/decisions/hack/boilerplate.go.txt
new file mode 100644
index 00000000..0fb88f91
--- /dev/null
+++ b/decisions/hack/boilerplate.go.txt
@@ -0,0 +1,2 @@
+// Copyright 2025 SAP SE
+// SPDX-License-Identifier: Apache-2.0
\ No newline at end of file
diff --git a/decisions/internal/controller/conf.go b/decisions/internal/controller/conf.go
new file mode 100644
index 00000000..5a329b25
--- /dev/null
+++ b/decisions/internal/controller/conf.go
@@ -0,0 +1,14 @@
+// Copyright 2025 SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package controller
+
+const (
+ DefaultTTLAfterDecisionSeconds = 24 * 60 * 60 // 24 hours in seconds
+)
+
+// Configuration for the decisions operator.
+type Config struct {
+ // TTL for scheduling decisions after the last decision's RequestedAt timestamp (in seconds)
+ TTLAfterDecisionSeconds int `json:"ttlAfterDecisionSeconds,omitempty"`
+}
diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go
new file mode 100644
index 00000000..7456bcce
--- /dev/null
+++ b/decisions/internal/controller/controller.go
@@ -0,0 +1,659 @@
+// Copyright 2025 SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package controller
+
+import (
+ "context"
+ "fmt"
+ "math"
+ "sort"
+ "strings"
+ "time"
+
+ apierrors "k8s.io/apimachinery/pkg/api/errors"
+ "k8s.io/apimachinery/pkg/runtime"
+ ctrl "sigs.k8s.io/controller-runtime"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/controller"
+ "sigs.k8s.io/controller-runtime/pkg/event"
+ logf "sigs.k8s.io/controller-runtime/pkg/log"
+ "sigs.k8s.io/controller-runtime/pkg/predicate"
+
+ "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1"
+ decisionsv1alpha1 "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1"
+)
+
+const (
+ MinScoreValue = -999999
+
+ selectedPerfectFmt = "Selected: %s (score: %.2f), certainty: perfect, %d hosts evaluated."
+ selectedCertaintyFmt = "Selected: %s (score: %.2f), certainty: %s (gap: %.2f), %d hosts evaluated."
+ noHostsRemainingFmt = "No hosts remaining after filtering, %d hosts evaluated"
+ inputConfirmedFmt = " Input choice confirmed: %s (%.2f→%.2f, remained #1)."
+ inputFilteredFmt = " Input favored %s (score: %.2f, now filtered), final winner was #%d in input (%.2f→%.2f)."
+ inputDemotedFmt = " Input favored %s (score: %.2f, now #%d with %.2f), final winner was #%d in input (%.2f→%.2f)."
+)
+
+type certaintyLevel struct {
+ threshold float64
+ level string
+}
+
+var certaintyLevels = []certaintyLevel{
+ {0.5, "high"},
+ {0.2, "medium"},
+ {0.0, "low"},
+}
+
+func getCertaintyLevel(gap float64) string {
+ for _, cl := range certaintyLevels {
+ if gap >= cl.threshold {
+ return cl.level
+ }
+ }
+ return "low"
+}
+
+type noDeleteEventsPredicate struct{}
+
+func (noDeleteEventsPredicate) Create(e event.CreateEvent) bool {
+ return true
+}
+
+func (noDeleteEventsPredicate) Update(e event.UpdateEvent) bool {
+ return true
+}
+
+func (noDeleteEventsPredicate) Delete(e event.DeleteEvent) bool {
+ // Ignore delete events to prevent race conditions with TTL controller
+ return false
+}
+
+func (noDeleteEventsPredicate) Generic(e event.GenericEvent) bool {
+ return true
+}
+
+type hostScore struct {
+ host string
+ score float64
+}
+
+// mapToSortedHostScores sorts hosts by score descending
+func mapToSortedHostScores(scores map[string]float64) []hostScore {
+ sorted := make([]hostScore, 0, len(scores))
+ for host, score := range scores {
+ sorted = append(sorted, hostScore{host: host, score: score})
+ }
+ sort.Slice(sorted, func(i, j int) bool {
+ return sorted[i].score > sorted[j].score
+ })
+ return sorted
+}
+
+func findHostPosition(hosts []hostScore, targetHost string) int {
+ for i, hs := range hosts {
+ if hs.host == targetHost {
+ return i + 1
+ }
+ }
+ return -1
+}
+
+// SchedulingDecisionReconciler reconciles a SchedulingDecision object
+type SchedulingDecisionReconciler struct {
+ // Client for the kubernetes API.
+ client.Client
+ // Kubernetes scheme to use for the decisions.
+ Scheme *runtime.Scheme
+ // Configuration for the controller.
+ Conf Config
+}
+
+// +kubebuilder:rbac:groups=decisions.cortex,resources=schedulingdecisions,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=decisions.cortex,resources=schedulingdecisions/status,verbs=get;update;patch
+// +kubebuilder:rbac:groups=decisions.cortex,resources=schedulingdecisions/finalizers,verbs=update
+
+func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+ _ = logf.FromContext(ctx)
+ // Fetch the decision object.
+ var res v1alpha1.SchedulingDecision
+ if err := r.Get(ctx, req.NamespacedName, &res); err != nil {
+ // Resource was deleted or doesn't exist - nothing to process
+ // This can happen when the TTL controller deletes a resource while
+ // a reconcile request is still queued for the main controller
+ return ctrl.Result{}, client.IgnoreNotFound(err)
+ }
+
+ // Validate we have at least one decision
+ if len(res.Spec.Decisions) == 0 {
+ if err := r.setErrorState(ctx, &res, fmt.Errorf("No decisions provided in spec")); err != nil {
+ return ctrl.Result{}, err
+ }
+ return ctrl.Result{}, nil
+ }
+
+ // Process each decision individually
+ results := make([]v1alpha1.SchedulingDecisionResult, 0, len(res.Spec.Decisions))
+
+ for _, decision := range res.Spec.Decisions {
+ // Validate input has at least one host for this decision
+ if err := r.validateInput(decision.Input); err != nil {
+ if err := r.setErrorState(ctx, &res, fmt.Errorf("Decision %s: %v", decision.ID, err)); err != nil {
+ return ctrl.Result{}, err
+ }
+ return ctrl.Result{}, nil
+ }
+
+ // Validate that all hosts in pipeline outputs exist in input for this decision
+ if err := r.validatePipelineHosts(decision.Input, decision.Pipeline.Outputs); err != nil {
+ if err := r.setErrorState(ctx, &res, fmt.Errorf("Decision %s: %v", decision.ID, err)); err != nil {
+ return ctrl.Result{}, err
+ }
+ return ctrl.Result{}, nil
+ }
+
+ finalScores, deletedHosts := r.calculateScores(decision.Input, decision.Pipeline.Outputs)
+
+ stepImpacts := r.calculateStepImpacts(decision.Input, decision.Pipeline.Outputs, finalScores)
+
+ criticalSteps, criticalStepCount := r.findCriticalSteps(decision.Input, decision.Pipeline.Outputs, finalScores)
+
+ orderedScores, description := r.generateOrderedScoresAndDescription(finalScores, decision.Input, criticalSteps, criticalStepCount, len(decision.Pipeline.Outputs), stepImpacts)
+
+ result := v1alpha1.SchedulingDecisionResult{
+ ID: decision.ID,
+ Description: description,
+ FinalScores: orderedScores,
+ DeletedHosts: deletedHosts,
+ }
+ results = append(results, result)
+ }
+
+ globalDescription := r.generateGlobalDescription(results, res.Spec.Decisions)
+
+ res.Status.State = v1alpha1.SchedulingDecisionStateResolved
+ res.Status.Error = ""
+ res.Status.DecisionCount = len(res.Spec.Decisions)
+ res.Status.GlobalDescription = globalDescription
+ res.Status.Results = results
+
+ if err := r.Status().Update(ctx, &res); err != nil {
+ // Handle the case where resource was deleted during processing
+ if client.IgnoreNotFound(err) != nil {
+ // If it's a conflict error, just log and ignore - resource was modified concurrently
+ if apierrors.IsConflict(err) {
+ log := logf.FromContext(ctx)
+ log.Info("Resource was modified during processing, ignoring conflict", "name", res.Name, "error", err.Error())
+ return ctrl.Result{}, nil
+ }
+ return ctrl.Result{}, err
+ }
+ // Resource was deleted (e.g., by TTL controller), nothing to update
+ return ctrl.Result{}, nil
+ }
+
+ log := logf.FromContext(ctx)
+ log.Info("Updated SchedulingDecision", "name", res.Name, "decisions", len(res.Spec.Decisions))
+
+ return ctrl.Result{}, nil // No need to requeue.
+}
+
+func (r *SchedulingDecisionReconciler) validateInput(input map[string]float64) error {
+ if len(input) == 0 {
+ return fmt.Errorf("No hosts provided in input")
+ }
+ return nil
+}
+
+// validatePipelineHosts checks if all hosts in pipeline outputs exist in input
+func (r *SchedulingDecisionReconciler) validatePipelineHosts(input map[string]float64, outputs []v1alpha1.SchedulingDecisionPipelineOutputSpec) error {
+ for _, output := range outputs {
+ for hostName := range output.Activations {
+ if _, exists := input[hostName]; !exists {
+ return fmt.Errorf("Host '%s' in pipeline output not found in input", hostName)
+ }
+ }
+ }
+ return nil
+}
+
+// setErrorState sets the error state and updates the resource status
+func (r *SchedulingDecisionReconciler) setErrorState(ctx context.Context, res *v1alpha1.SchedulingDecision, err error) error {
+ res.Status.State = v1alpha1.SchedulingDecisionStateError
+ res.Status.Error = err.Error()
+
+ log := logf.FromContext(ctx)
+ log.Error(err, "Updated SchedulingDecision with error", "name", res.Name)
+
+ return r.Status().Update(ctx, res)
+}
+
+// findWinner returns the host with the highest score and the score value
+func findWinner(scores map[string]float64) (string, float64) {
+ if len(scores) == 0 {
+ return "", MinScoreValue
+ }
+
+ winner := ""
+ maxScore := float64(MinScoreValue)
+ for host, score := range scores {
+ if score > maxScore {
+ maxScore = score
+ winner = host
+ }
+ }
+ return winner, maxScore
+}
+
+// calculateScores processes pipeline outputs and returns final scores and deleted hosts
+func (r *SchedulingDecisionReconciler) calculateScores(input map[string]float64, outputs []v1alpha1.SchedulingDecisionPipelineOutputSpec) (map[string]float64, map[string][]string) {
+ finalScores := make(map[string]float64, len(input))
+ deletedHosts := make(map[string][]string)
+
+ // Start with input values as initial scores
+ for hostName, inputValue := range input {
+ finalScores[hostName] = inputValue
+ }
+
+ // Process each pipeline step sequentially
+ for _, output := range outputs {
+ // Check which hosts will be deleted in this step
+ for hostName := range finalScores {
+ if _, exists := output.Activations[hostName]; !exists {
+ // Host not in this step's activations - will be deleted
+ deletedHosts[hostName] = append(deletedHosts[hostName], output.Step)
+ }
+ }
+
+ // Apply activations and remove hosts not in this step
+ for hostName := range finalScores {
+ if activation, exists := output.Activations[hostName]; exists {
+ // Add activation to current score
+ finalScores[hostName] = finalScores[hostName] + activation
+ } else {
+ // Host not in this step - remove it
+ delete(finalScores, hostName)
+ }
+ }
+ }
+
+ return finalScores, deletedHosts
+}
+
+// findCriticalSteps determines which steps change the winning host using backward elimination
+func (r *SchedulingDecisionReconciler) findCriticalSteps(input map[string]float64, outputs []v1alpha1.SchedulingDecisionPipelineOutputSpec, baselineFinalScores map[string]float64) ([]string, int) {
+ if len(outputs) == 0 {
+ return []string{}, 0
+ }
+
+ // Get baseline winner
+ baselineWinner, _ := findWinner(baselineFinalScores)
+ if baselineWinner == "" {
+ return []string{}, 0
+ }
+
+ criticalSteps := make([]string, 0)
+
+ // Try removing each step one by one
+ for i, stepToRemove := range outputs {
+ // Create pipeline without this step using slice operations
+ reducedOutputs := make([]v1alpha1.SchedulingDecisionPipelineOutputSpec, 0, len(outputs)-1)
+ reducedOutputs = append(reducedOutputs, outputs[:i]...)
+ reducedOutputs = append(reducedOutputs, outputs[i+1:]...)
+
+ // Calculate scores without this step
+ reducedFinalScores, _ := r.calculateScores(input, reducedOutputs)
+
+ // Find winner without this step
+ reducedWinner, _ := findWinner(reducedFinalScores)
+
+ // If removing this step changes the winner, it's critical
+ if reducedWinner != baselineWinner {
+ criticalSteps = append(criticalSteps, stepToRemove.Step)
+ }
+ }
+
+ return criticalSteps, len(criticalSteps)
+}
+
+// StepImpact represents the impact of a single pipeline step on the winning host
+type StepImpact struct {
+ Step string
+ ScoreBefore float64
+ ScoreAfter float64
+ ScoreDelta float64
+ CompetitorsRemoved int
+ PromotedToFirst bool
+}
+
+// calculateStepImpacts tracks how each pipeline step affects the final winner
+func (r *SchedulingDecisionReconciler) calculateStepImpacts(input map[string]float64, outputs []v1alpha1.SchedulingDecisionPipelineOutputSpec, finalScores map[string]float64) []StepImpact {
+ if len(finalScores) == 0 || len(outputs) == 0 {
+ return []StepImpact{}
+ }
+
+ // Find the final winner
+ finalWinner, _ := findWinner(finalScores)
+ if finalWinner == "" {
+ return []StepImpact{}
+ }
+
+ stepImpacts := make([]StepImpact, 0, len(outputs))
+ currentScores := make(map[string]float64)
+
+ // Start with input values as initial scores
+ for hostName, inputValue := range input {
+ currentScores[hostName] = inputValue
+ }
+
+ // Track score before first step
+ scoreBefore := currentScores[finalWinner]
+
+ // Process each pipeline step and track the winner's evolution
+ for _, output := range outputs {
+ // Count how many competitors will be removed in this step
+ competitorsRemoved := 0
+ for hostName := range currentScores {
+ if hostName != finalWinner {
+ if _, exists := output.Activations[hostName]; !exists {
+ competitorsRemoved++
+ }
+ }
+ }
+
+ // Check if winner was #1 before this step
+ wasFirst := true
+ winnerScoreBefore := currentScores[finalWinner]
+ for host, score := range currentScores {
+ if host != finalWinner && score > winnerScoreBefore {
+ wasFirst = false
+ break
+ }
+ }
+
+ // Apply activations and remove hosts not in this step
+ newScores := make(map[string]float64)
+ for hostName, score := range currentScores {
+ if activation, exists := output.Activations[hostName]; exists {
+ newScores[hostName] = score + activation
+ }
+ // Hosts not in activations are removed (don't copy to newScores)
+ }
+
+ // Get winner's score after this step
+ scoreAfter := newScores[finalWinner]
+
+ // Check if winner became #1 after this step
+ isFirstAfter := true
+ for host, score := range newScores {
+ if host != finalWinner && score > scoreAfter {
+ isFirstAfter = false
+ break
+ }
+ }
+
+ promotedToFirst := !wasFirst && isFirstAfter
+
+ stepImpacts = append(stepImpacts, StepImpact{
+ Step: output.Step,
+ ScoreBefore: scoreBefore,
+ ScoreAfter: scoreAfter,
+ ScoreDelta: scoreAfter - scoreBefore,
+ CompetitorsRemoved: competitorsRemoved,
+ PromotedToFirst: promotedToFirst,
+ })
+
+ // Update for next iteration
+ currentScores = newScores
+ scoreBefore = scoreAfter
+ }
+
+ return stepImpacts
+}
+
+// generateOrderedScoresAndDescription sorts final scores by value (highest to lowest)
+// and generates a brief description with highest host, certainty, host count, input comparison, step impacts, and critical path
+func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(finalScores map[string]float64, inputScores map[string]float64, criticalSteps []string, criticalStepCount int, totalSteps int, stepImpacts []StepImpact) (map[string]float64, string) {
+ totalInputHosts := len(inputScores)
+ if len(finalScores) == 0 {
+ return finalScores, fmt.Sprintf(noHostsRemainingFmt, totalInputHosts)
+ }
+
+ // Sort final scores by value (highest to lowest)
+ sortedHosts := mapToSortedHostScores(finalScores)
+
+ // Create ordered map (Go maps maintain insertion order as of Go 1.8+)
+ orderedScores := make(map[string]float64)
+ for _, hs := range sortedHosts {
+ orderedScores[hs.host] = hs.score
+ }
+
+ // Sort input scores to determine input-based ranking
+ sortedInputHosts := mapToSortedHostScores(inputScores)
+
+ // Find positions and generate comparison
+ finalWinner := sortedHosts[0].host
+ inputWinner := sortedInputHosts[0].host
+ finalWinnerInputScore := inputScores[finalWinner]
+
+ // Find final winner's position in input ranking
+ finalWinnerInputPosition := findHostPosition(sortedInputHosts, finalWinner)
+
+ // Generate main description
+ var description string
+ if len(sortedHosts) == 1 {
+ description = fmt.Sprintf(selectedPerfectFmt, sortedHosts[0].host, sortedHosts[0].score, totalInputHosts)
+ } else {
+ // Calculate certainty based on gap between 1st and 2nd place
+ gap := sortedHosts[0].score - sortedHosts[1].score
+ certainty := getCertaintyLevel(gap)
+ description = fmt.Sprintf(selectedCertaintyFmt, sortedHosts[0].host, sortedHosts[0].score, certainty, gap, totalInputHosts)
+ }
+
+ // Add input vs. final comparison
+ var comparison string
+ if inputWinner == finalWinner {
+ // Input choice confirmed
+ comparison = fmt.Sprintf(inputConfirmedFmt, finalWinner, finalWinnerInputScore, sortedHosts[0].score)
+ } else {
+ // Input winner different from final winner
+ inputWinnerScore := sortedInputHosts[0].score
+
+ // Check if input winner was filtered out
+ _, inputWinnerSurvived := finalScores[inputWinner]
+ if !inputWinnerSurvived {
+ comparison = fmt.Sprintf(inputFilteredFmt, inputWinner, inputWinnerScore, finalWinnerInputPosition, finalWinnerInputScore, sortedHosts[0].score)
+ } else {
+ // Find input winner's position in final ranking
+ inputWinnerFinalPosition := findHostPosition(sortedHosts, inputWinner)
+ comparison = fmt.Sprintf(inputDemotedFmt, inputWinner, inputWinnerScore, inputWinnerFinalPosition, finalScores[inputWinner],
+ finalWinnerInputPosition, finalWinnerInputScore, sortedHosts[0].score)
+ }
+ }
+
+ // Add step impact analysis for the winner using multi-line format
+ var stepImpactInfo string
+ if len(stepImpacts) > 0 {
+ stepImpactInfo = r.formatStepImpactsMultiLine(stepImpacts)
+ }
+
+ // Add critical path information
+ var criticalPath string
+ if totalSteps > 0 {
+ if criticalStepCount == 0 {
+ criticalPath = fmt.Sprintf(" Decision driven by input only (all %d steps are non-critical).", totalSteps)
+ } else if criticalStepCount == totalSteps {
+ criticalPath = fmt.Sprintf(" Decision requires all %d pipeline steps.", totalSteps)
+ } else {
+ if criticalStepCount == 1 {
+ criticalPath = fmt.Sprintf(" Decision driven by 1/%d pipeline step: %s.", totalSteps, criticalSteps[0])
+ } else {
+ // Join critical steps with proper separators
+ var stepList string
+ if len(criticalSteps) == 2 {
+ stepList = strings.Join(criticalSteps, " and ")
+ } else {
+ // For 3+ steps: "step1, step2, and step3"
+ lastStep := criticalSteps[len(criticalSteps)-1]
+ otherSteps := criticalSteps[:len(criticalSteps)-1]
+ stepList = strings.Join(otherSteps, ", ") + " and " + lastStep
+ }
+ criticalPath = fmt.Sprintf(" Decision driven by %d/%d pipeline steps: %s.", criticalStepCount, totalSteps, stepList)
+ }
+ }
+ }
+
+ description += comparison + criticalPath + stepImpactInfo
+ return orderedScores, description
+}
+
+// formatImpactValue formats a single step impact value
+func formatImpactValue(impact StepImpact) string {
+ if impact.PromotedToFirst {
+ return fmt.Sprintf("%+.2f→#1", impact.ScoreDelta)
+ }
+ if impact.ScoreDelta != 0 {
+ return fmt.Sprintf("%+.2f", impact.ScoreDelta)
+ }
+ if impact.CompetitorsRemoved > 0 {
+ return fmt.Sprintf("+0.00 (removed %d)", impact.CompetitorsRemoved)
+ }
+ return "+0.00"
+}
+
+// formatStepImpactsMultiLine formats step impacts in a simple delta-ordered format
+// without confusing terminology, ordered by absolute impact magnitude
+func (r *SchedulingDecisionReconciler) formatStepImpactsMultiLine(stepImpacts []StepImpact) string {
+ if len(stepImpacts) == 0 {
+ return ""
+ }
+
+ // Sort by absolute delta impact (highest first), with promotions taking priority for ties
+ sort.Slice(stepImpacts, func(i, j int) bool {
+ absI, absJ := math.Abs(stepImpacts[i].ScoreDelta), math.Abs(stepImpacts[j].ScoreDelta)
+ if absI != absJ {
+ return absI > absJ
+ }
+ if stepImpacts[i].PromotedToFirst != stepImpacts[j].PromotedToFirst {
+ return stepImpacts[i].PromotedToFirst
+ }
+ return stepImpacts[i].Step < stepImpacts[j].Step
+ })
+
+ var b strings.Builder
+ b.WriteString(" Step impacts:")
+ for _, impact := range stepImpacts {
+ fmt.Fprintf(&b, "\n• %s %s", impact.Step, formatImpactValue(impact))
+ }
+ return b.String() + "."
+}
+
+// hostSegment represents a segment in the host chain with duration and decision count
+type hostSegment struct {
+ host string
+ duration time.Duration
+ decisions int
+}
+
+// formatDuration formats a duration in a simple d/h/m format
+func formatDuration(d time.Duration) string {
+ if d >= 24*time.Hour {
+ return fmt.Sprintf("%dd", int(d.Hours()/24))
+ }
+ if d >= time.Hour {
+ return fmt.Sprintf("%dh", int(d.Hours()))
+ }
+ return fmt.Sprintf("%dm", int(d.Minutes()))
+}
+
+// generateGlobalDescription creates a global description for decisions
+// showing the host chain with durations and detecting simple loops
+func (r *SchedulingDecisionReconciler) generateGlobalDescription(results []v1alpha1.SchedulingDecisionResult, decisions []v1alpha1.SchedulingDecisionRequest) string {
+ if len(results) == 0 {
+ return "" // No decisions to describe
+ }
+
+ // Extract host chain from winners
+ hostChain := make([]string, 0, len(results))
+ for _, result := range results {
+ winner, _ := findWinner(result.FinalScores)
+ hostChain = append(hostChain, winner)
+ }
+
+ // Build segments with durations in one pass
+ segments := make([]hostSegment, 0)
+ if len(hostChain) > 0 {
+ currentHost := hostChain[0]
+ segmentStart := 0
+
+ for i := 1; i <= len(hostChain); i++ {
+ // Check if we've reached the end or found a different host
+ if i == len(hostChain) || hostChain[i] != currentHost {
+ // Calculate duration for this segment
+ startTime := decisions[segmentStart].RequestedAt.Time
+ var endTime time.Time
+ if i == len(hostChain) {
+ // For the last segment, use the same time as start time (0 duration)
+ endTime = startTime
+ } else {
+ endTime = decisions[i].RequestedAt.Time
+ }
+
+ segments = append(segments, hostSegment{
+ host: currentHost,
+ duration: endTime.Sub(startTime),
+ decisions: i - segmentStart,
+ })
+
+ if i < len(hostChain) {
+ currentHost = hostChain[i]
+ segmentStart = i
+ }
+ }
+ }
+ }
+
+ // Build chain string with durations
+ chainParts := make([]string, 0, len(segments))
+ for _, segment := range segments {
+ part := segment.host + " (" + formatDuration(segment.duration)
+ if segment.decisions > 1 {
+ part += fmt.Sprintf("; %d decisions", segment.decisions)
+ }
+ part += ")"
+ chainParts = append(chainParts, part)
+ }
+
+ hasLoop := false
+ seenHosts := make(map[string]bool)
+ for segment := range segments {
+ if seenHosts[segments[segment].host] {
+ hasLoop = true
+ break
+ }
+ seenHosts[segments[segment].host] = true
+ }
+
+ chainStr := strings.Join(chainParts, " -> ")
+ if hasLoop {
+ return fmt.Sprintf("chain (loop detected): %s", chainStr)
+ } else {
+ return fmt.Sprintf("chain: %s", chainStr)
+ }
+}
+
+// SetupWithManager sets up the controller with the Manager.
+func (r *SchedulingDecisionReconciler) SetupWithManager(mgr ctrl.Manager) error {
+ return ctrl.NewControllerManagedBy(mgr).
+ For(&decisionsv1alpha1.SchedulingDecision{}).
+ Named("schedulingdecision").
+ WithOptions(controller.Options{
+ MaxConcurrentReconciles: 1, // Default
+ }).
+ WithEventFilter(predicate.And(
+ predicate.GenerationChangedPredicate{},
+ noDeleteEventsPredicate{},
+ )).
+ Complete(r)
+}
diff --git a/decisions/internal/controller/controller_test.go b/decisions/internal/controller/controller_test.go
new file mode 100644
index 00000000..5d65a7b9
--- /dev/null
+++ b/decisions/internal/controller/controller_test.go
@@ -0,0 +1,1091 @@
+// Copyright 2025 SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package controller
+
+import (
+ "fmt"
+ "testing"
+ "time"
+
+ "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1"
+)
+
+func TestReconcile(t *testing.T) {
+ // Create test decision with pipeline outputs
+ decision := NewTestDecision("decision-1").
+ WithInput(map[string]float64{
+ "host1": 1.0,
+ "host2": 2.0,
+ }).
+ WithPipelineOutputs(
+ NewTestPipelineOutput("weigher", map[string]float64{
+ "host1": 0.5,
+ "host2": 0.5,
+ }),
+ NewTestPipelineOutput("filter", map[string]float64{
+ "host1": 0.0,
+ }),
+ ).
+ Build()
+
+ resource := NewTestSchedulingDecision("test-decision").
+ WithDecisions(decision).
+ Build()
+
+ fakeClient, _ := SetupTestEnvironment(t, resource)
+ req := CreateTestRequest("test-decision")
+
+ reconciler := CreateSchedulingReconciler(fakeClient)
+ _, err := reconciler.Reconcile(t.Context(), req)
+ if err != nil {
+ t.Fatalf("Reconcile returned an error: %v", err)
+ }
+
+ // Fetch and verify the updated resource
+ updatedResource := AssertResourceExists(t, fakeClient, "test-decision")
+ AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateResolved)
+ AssertNoError(t, updatedResource)
+ AssertDecisionCount(t, updatedResource, 1)
+ AssertResultCount(t, updatedResource, 1)
+
+ result := updatedResource.Status.Results[0]
+ if result.ID != "decision-1" {
+ t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID)
+ }
+
+ expectedDescription := "Selected: host1 (score: 1.50), certainty: perfect, 2 hosts evaluated. Input favored host2 (score: 2.00, now filtered), final winner was #2 in input (1.00→1.50). Decision driven by 1/2 pipeline step: filter. Step impacts:\n• weigher +0.50\n• filter +0.00→#1."
+ if result.Description != expectedDescription {
+ t.Errorf("Expected description '%s', got '%s'", expectedDescription, result.Description)
+ }
+
+ // Verify final scores calculation
+ // Expected: host1: 1.0 + 0.5 + 0.0 = 1.5, host2: removed by filter step
+ expectedFinalScores := map[string]float64{
+ "host1": 1.5,
+ }
+ AssertFinalScores(t, result, expectedFinalScores)
+
+ // Verify deleted hosts tracking
+ expectedDeletedHosts := map[string][]string{
+ "host2": {"filter"}, // host2 was deleted by the filter step
+ }
+ AssertDeletedHosts(t, result, expectedDeletedHosts)
+
+ t.Logf("Reconcile completed successfully: state=%s, finalScores=%v, deletedHosts=%v",
+ updatedResource.Status.State, result.FinalScores, result.DeletedHosts)
+}
+
+func TestReconcileEmptyInput(t *testing.T) {
+ // Create test decision with empty input
+ decision := NewTestDecision("decision-1").
+ WithInput(map[string]float64{}). // Empty input - no hosts
+ WithPipelineOutputs(
+ NewTestPipelineOutput("weigher", map[string]float64{
+ "host1": 0.5,
+ "host2": 0.5,
+ }),
+ ).
+ Build()
+
+ resource := NewTestSchedulingDecision("test-decision-empty-input").
+ WithDecisions(decision).
+ Build()
+
+ fakeClient, _ := SetupTestEnvironment(t, resource)
+ req := CreateTestRequest("test-decision-empty-input")
+
+ reconciler := CreateSchedulingReconciler(fakeClient)
+ _, err := reconciler.Reconcile(t.Context(), req)
+ if err != nil {
+ t.Fatalf("Reconcile returned an error: %v", err)
+ }
+
+ // Fetch and verify the updated resource
+ updatedResource := AssertResourceExists(t, fakeClient, "test-decision-empty-input")
+ AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateError)
+ AssertResourceError(t, updatedResource, "Decision decision-1: No hosts provided in input")
+
+ t.Logf("Reconcile completed with error: state=%s, error=%s", updatedResource.Status.State, updatedResource.Status.Error)
+}
+
+func TestReconcileHostMismatch(t *testing.T) {
+ // Create test decision with host mismatch (host3 in pipeline but not in input)
+ decision := NewTestDecision("decision-1").
+ WithInput(map[string]float64{
+ "host1": 1.0,
+ "host2": 2.0,
+ }).
+ WithPipelineOutputs(
+ NewTestPipelineOutput("weigher", map[string]float64{
+ "host1": 0.5,
+ "host3": 0.3, // host3 doesn't exist in input
+ }),
+ ).
+ Build()
+
+ resource := NewTestSchedulingDecision("test-decision-host-mismatch").
+ WithDecisions(decision).
+ Build()
+
+ fakeClient, _ := SetupTestEnvironment(t, resource)
+ req := CreateTestRequest("test-decision-host-mismatch")
+
+ reconciler := CreateSchedulingReconciler(fakeClient)
+ _, err := reconciler.Reconcile(t.Context(), req)
+ if err != nil {
+ t.Fatalf("Reconcile returned an error: %v", err)
+ }
+
+ // Fetch and verify the updated resource
+ updatedResource := AssertResourceExists(t, fakeClient, "test-decision-host-mismatch")
+ AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateError)
+ AssertResourceError(t, updatedResource, "Decision decision-1: Host 'host3' in pipeline output not found in input")
+
+ t.Logf("Reconcile completed with host mismatch error: state=%s, error=%s", updatedResource.Status.State, updatedResource.Status.Error)
+}
+
+func TestReconcileComplexScoring(t *testing.T) {
+ // Create test decision with complex multi-step pipeline
+ decision := NewTestDecision("decision-1").
+ WithInput(map[string]float64{
+ "host1": 1.0,
+ "host2": 2.0,
+ "host3": 3.0,
+ "host4": 4.0,
+ }).
+ WithPipelineOutputs(
+ NewTestPipelineOutput("weigher1", map[string]float64{
+ "host1": 0.5,
+ "host2": 1.0,
+ "host3": -0.5,
+ "host4": 2.0,
+ }),
+ NewTestPipelineOutput("filter1", map[string]float64{
+ "host1": 0.2,
+ "host3": 0.1, // host2 and host4 removed by this step
+ }),
+ NewTestPipelineOutput("weigher2", map[string]float64{
+ "host1": -0.3, // host3 removed by this step
+ }),
+ ).
+ Build()
+
+ resource := NewTestSchedulingDecision("test-decision-complex").
+ WithDecisions(decision).
+ Build()
+
+ fakeClient, _ := SetupTestEnvironment(t, resource)
+ req := CreateTestRequest("test-decision-complex")
+
+ reconciler := CreateSchedulingReconciler(fakeClient)
+ _, err := reconciler.Reconcile(t.Context(), req)
+ if err != nil {
+ t.Fatalf("Reconcile returned an error: %v", err)
+ }
+
+ // Fetch and verify the updated resource
+ updatedResource := AssertResourceExists(t, fakeClient, "test-decision-complex")
+ AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateResolved)
+ AssertResultCount(t, updatedResource, 1)
+
+ result := updatedResource.Status.Results[0]
+ if result.ID != "decision-1" {
+ t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID)
+ }
+
+ // Verify final scores calculation
+ // Expected: host1: 1.0 + 0.5 + 0.2 + (-0.3) = 1.4
+ // host2: removed by filter1, host3: removed by weigher2, host4: removed by filter1
+ expectedFinalScores := map[string]float64{
+ "host1": 1.4,
+ }
+ AssertFinalScores(t, result, expectedFinalScores)
+
+ // Verify deleted hosts tracking
+ expectedDeletedHosts := map[string][]string{
+ "host2": {"filter1"}, // host2 deleted by filter1
+ "host4": {"filter1"}, // host4 deleted by filter1
+ "host3": {"weigher2"}, // host3 deleted by weigher2
+ }
+ AssertDeletedHosts(t, result, expectedDeletedHosts)
+
+ t.Logf("Complex scoring completed: finalScores=%v, deletedHosts=%v",
+ result.FinalScores, result.DeletedHosts)
+}
+
+func TestReconcileMultipleDeletionSteps(t *testing.T) {
+ // Create test decision with multiple filter steps that remove all hosts
+ decision := NewTestDecision("decision-1").
+ WithInput(map[string]float64{
+ "host1": 1.0,
+ "host2": 2.0,
+ "host3": 3.0,
+ }).
+ WithPipelineOutputs(
+ NewTestPipelineOutput("weigher1", map[string]float64{
+ "host1": 0.5,
+ "host2": 1.0,
+ "host3": -0.5,
+ }),
+ NewTestPipelineOutput("filter1", map[string]float64{
+ "host1": 0.2,
+ // host2 and host3 removed by this step
+ }),
+ NewTestPipelineOutput("filter2", map[string]float64{
+ // host1 removed by this step
+ // host2 and host3 would be removed again, but they're already gone
+ }),
+ ).
+ Build()
+
+ resource := NewTestSchedulingDecision("test-decision-multiple-deletions").
+ WithDecisions(decision).
+ Build()
+
+ fakeClient, _ := SetupTestEnvironment(t, resource)
+ req := CreateTestRequest("test-decision-multiple-deletions")
+
+ reconciler := CreateSchedulingReconciler(fakeClient)
+ _, err := reconciler.Reconcile(t.Context(), req)
+ if err != nil {
+ t.Fatalf("Reconcile returned an error: %v", err)
+ }
+
+ // Fetch and verify the updated resource
+ updatedResource := AssertResourceExists(t, fakeClient, "test-decision-multiple-deletions")
+ AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateResolved)
+ AssertResultCount(t, updatedResource, 1)
+
+ result := updatedResource.Status.Results[0]
+ if result.ID != "decision-1" {
+ t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID)
+ }
+
+ // Verify final scores calculation - all hosts should be removed, no final scores
+ expectedFinalScores := map[string]float64{}
+ AssertFinalScores(t, result, expectedFinalScores)
+
+ // Verify deleted hosts tracking
+ // host2 and host3 deleted by filter1, host1 deleted by filter2
+ expectedDeletedHosts := map[string][]string{
+ "host2": {"filter1"}, // host2 deleted by filter1
+ "host3": {"filter1"}, // host3 deleted by filter1
+ "host1": {"filter2"}, // host1 deleted by filter2
+ }
+ AssertDeletedHosts(t, result, expectedDeletedHosts)
+
+ t.Logf("Multiple deletion test completed: finalScores=%v, deletedHosts=%v",
+ result.FinalScores, result.DeletedHosts)
+}
+
+func TestReconcileCertaintyLevels(t *testing.T) {
+ tests := []struct {
+ name string
+ input map[string]float64
+ activations map[string]float64
+ expectedWinner string
+ expectedCertainty string
+ }{
+ {
+ name: "high-certainty",
+ input: map[string]float64{
+ "host1": 1.0,
+ "host2": 1.0,
+ },
+ activations: map[string]float64{
+ "host1": 1.0, // host1: 2.0, host2: 1.0, gap = 1.0 (high)
+ "host2": 0.0,
+ },
+ expectedWinner: "host1",
+ expectedCertainty: "high",
+ },
+ {
+ name: "medium-certainty",
+ input: map[string]float64{
+ "host1": 1.0,
+ "host2": 1.0,
+ },
+ activations: map[string]float64{
+ "host1": 0.3, // host1: 1.3, host2: 1.0, gap = 0.3 (medium)
+ "host2": 0.0,
+ },
+ expectedWinner: "host1",
+ expectedCertainty: "medium",
+ },
+ {
+ name: "low-certainty",
+ input: map[string]float64{
+ "host1": 1.0,
+ "host2": 1.0,
+ },
+ activations: map[string]float64{
+ "host1": 0.1, // host1: 1.1, host2: 1.0, gap = 0.1 (low)
+ "host2": 0.0,
+ },
+ expectedWinner: "host1",
+ expectedCertainty: "low",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // Create test decision with specific activations to test certainty levels
+ decision := NewTestDecision("decision-1").
+ WithInput(tt.input).
+ WithPipelineOutputs(
+ NewTestPipelineOutput("weigher", tt.activations),
+ ).
+ Build()
+
+ resource := NewTestSchedulingDecision("test-certainty-" + tt.name).
+ WithDecisions(decision).
+ Build()
+
+ fakeClient, _ := SetupTestEnvironment(t, resource)
+ req := CreateTestRequest("test-certainty-" + tt.name)
+
+ reconciler := CreateSchedulingReconciler(fakeClient)
+ _, err := reconciler.Reconcile(t.Context(), req)
+ if err != nil {
+ t.Fatalf("Reconcile returned an error: %v", err)
+ }
+
+ // Fetch and verify the updated resource
+ updatedResource := AssertResourceExists(t, fakeClient, "test-certainty-"+tt.name)
+ AssertResultCount(t, updatedResource, 1)
+
+ result := updatedResource.Status.Results[0]
+ if result.ID != "decision-1" {
+ t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID)
+ }
+
+ // Verify the description contains the expected winner and certainty
+ AssertDescriptionContains(t, result.Description,
+ "Selected: "+tt.expectedWinner,
+ "certainty: "+tt.expectedCertainty,
+ )
+
+ t.Logf("Certainty test %s completed: %s", tt.name, result.Description)
+ })
+ }
+}
+
+func TestReconcileNoHostsRemaining(t *testing.T) {
+ // Create test decision where all hosts are filtered out
+ decision := NewTestDecision("decision-1").
+ WithInput(map[string]float64{
+ "host1": 1.0,
+ "host2": 2.0,
+ }).
+ WithPipelineOutputs(
+ NewTestPipelineOutput("filter-all", map[string]float64{
+ // No hosts in activations - all will be filtered out
+ }),
+ ).
+ Build()
+
+ resource := NewTestSchedulingDecision("test-no-hosts-remaining").
+ WithDecisions(decision).
+ Build()
+
+ fakeClient, _ := SetupTestEnvironment(t, resource)
+ req := CreateTestRequest("test-no-hosts-remaining")
+
+ reconciler := CreateSchedulingReconciler(fakeClient)
+ _, err := reconciler.Reconcile(t.Context(), req)
+ if err != nil {
+ t.Fatalf("Reconcile returned an error: %v", err)
+ }
+
+ // Fetch and verify the updated resource
+ updatedResource := AssertResourceExists(t, fakeClient, "test-no-hosts-remaining")
+ AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateResolved)
+ AssertResultCount(t, updatedResource, 1)
+
+ result := updatedResource.Status.Results[0]
+ if result.ID != "decision-1" {
+ t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID)
+ }
+
+ // Verify no final scores since all hosts were filtered out
+ expectedFinalScores := map[string]float64{}
+ AssertFinalScores(t, result, expectedFinalScores)
+
+ expectedDescription := "No hosts remaining after filtering, 2 hosts evaluated"
+ if result.Description != expectedDescription {
+ t.Errorf("Expected description '%s', got '%s'", expectedDescription, result.Description)
+ }
+
+ t.Logf("No hosts remaining test completed: %s", result.Description)
+}
+
+func TestReconcileInputVsFinalComparison(t *testing.T) {
+ tests := []struct {
+ name string
+ input map[string]float64
+ activations map[string]float64
+ expectedDescContains []string
+ }{
+ {
+ name: "input-choice-confirmed",
+ input: map[string]float64{
+ "host1": 3.0, // highest in input
+ "host2": 2.0,
+ "host3": 1.0,
+ },
+ activations: map[string]float64{
+ "host1": 0.5, "host2": 0.3, "host3": 0.1, // host1 stays winner
+ },
+ expectedDescContains: []string{
+ "Selected: host1",
+ "Input choice confirmed: host1 (3.00→3.50, remained #1)",
+ },
+ },
+ {
+ name: "input-winner-filtered",
+ input: map[string]float64{
+ "host1": 1.0,
+ "host2": 3.0, // highest in input
+ "host3": 2.0,
+ },
+ activations: map[string]float64{
+ "host1": 0.5, "host3": 0.3, // host2 filtered out, host3 becomes winner
+ },
+ expectedDescContains: []string{
+ "Selected: host3",
+ "Input favored host2 (score: 3.00, now filtered)",
+ "final winner was #2 in input (2.00→2.30)",
+ },
+ },
+ {
+ name: "input-winner-demoted",
+ input: map[string]float64{
+ "host1": 1.0,
+ "host2": 3.0, // highest in input
+ "host3": 2.0,
+ },
+ activations: map[string]float64{
+ "host1": 2.5, "host2": -0.5, "host3": 0.8, // host1 becomes winner, host2 demoted to #3
+ },
+ expectedDescContains: []string{
+ "Selected: host1",
+ "Input favored host2 (score: 3.00, now #3 with 2.50)",
+ "final winner was #3 in input (1.00→3.50)",
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // Create test decision to compare input vs final rankings
+ decision := NewTestDecision("decision-1").
+ WithInput(tt.input).
+ WithPipelineOutputs(
+ NewTestPipelineOutput("weigher", tt.activations),
+ ).
+ Build()
+
+ resource := NewTestSchedulingDecision("test-input-vs-final-" + tt.name).
+ WithDecisions(decision).
+ Build()
+
+ fakeClient, _ := SetupTestEnvironment(t, resource)
+ req := CreateTestRequest("test-input-vs-final-" + tt.name)
+
+ reconciler := CreateSchedulingReconciler(fakeClient)
+ _, err := reconciler.Reconcile(t.Context(), req)
+ if err != nil {
+ t.Fatalf("Reconcile returned an error: %v", err)
+ }
+
+ // Fetch and verify the updated resource
+ updatedResource := AssertResourceExists(t, fakeClient, "test-input-vs-final-"+tt.name)
+ AssertResultCount(t, updatedResource, 1)
+
+ result := updatedResource.Status.Results[0]
+ if result.ID != "decision-1" {
+ t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID)
+ }
+
+ // Verify the description contains expected elements
+ AssertDescriptionContains(t, result.Description, tt.expectedDescContains...)
+
+ t.Logf("Input vs Final test %s completed: %s", tt.name, result.Description)
+ })
+ }
+}
+
+func TestReconcileCriticalStepElimination(t *testing.T) {
+ tests := []struct {
+ name string
+ input map[string]float64
+ pipelineOutputs []v1alpha1.SchedulingDecisionPipelineOutputSpec
+ expectedCriticalMessage string
+ }{
+ {
+ name: "single-critical-step",
+ input: map[string]float64{
+ "host1": 2.0, // Would win without pipeline
+ "host2": 1.0,
+ "host3": 1.5,
+ },
+ pipelineOutputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{
+ {
+ Step: "non-critical-weigher",
+ Activations: map[string]float64{
+ "host1": 0.1, // Small changes don't affect winner
+ "host2": 0.1,
+ "host3": 0.1,
+ },
+ },
+ {
+ Step: "critical-filter",
+ Activations: map[string]float64{
+ "host2": 0.0, // host1 and host3 filtered out, host2 becomes winner
+ "host3": 0.0,
+ },
+ },
+ },
+ expectedCriticalMessage: "Decision driven by 1/2 pipeline step: critical-filter.",
+ },
+ {
+ name: "multiple-critical-steps",
+ input: map[string]float64{
+ "host1": 1.0,
+ "host2": 3.0, // Strong initial winner
+ "host3": 2.0,
+ },
+ pipelineOutputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{
+ {
+ Step: "critical-weigher1",
+ Activations: map[string]float64{
+ "host1": 1.0, // host1: 2.0, host2: 2.5, host3: 2.5 (ties host2 and host3)
+ "host2": -0.5,
+ "host3": 0.5,
+ },
+ },
+ {
+ Step: "critical-weigher2",
+ Activations: map[string]float64{
+ "host1": 1.0, // host1: 3.0, host2: 2.5, host3: 2.5 (host1 becomes winner)
+ "host2": 0.0,
+ "host3": 0.0,
+ },
+ },
+ },
+ expectedCriticalMessage: "Decision requires all 2 pipeline steps.",
+ },
+ {
+ name: "all-non-critical",
+ input: map[string]float64{
+ "host1": 3.0, // Clear winner from input
+ "host2": 1.0,
+ "host3": 2.0,
+ },
+ pipelineOutputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{
+ {
+ Step: "non-critical-weigher1",
+ Activations: map[string]float64{
+ "host1": 0.1, // Small changes don't change winner
+ "host2": 0.1,
+ "host3": 0.1,
+ },
+ },
+ {
+ Step: "non-critical-weigher2",
+ Activations: map[string]float64{
+ "host1": 0.2,
+ "host2": 0.0,
+ "host3": 0.1,
+ },
+ },
+ },
+ expectedCriticalMessage: "Decision driven by input only (all 2 steps are non-critical).",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // Create test decision with multiple pipeline steps to test critical step analysis
+ decision := NewTestDecision("decision-1").
+ WithInput(tt.input).
+ WithPipelineOutputs(tt.pipelineOutputs...).
+ Build()
+
+ resource := NewTestSchedulingDecision("test-critical-steps-" + tt.name).
+ WithDecisions(decision).
+ Build()
+
+ fakeClient, _ := SetupTestEnvironment(t, resource)
+ req := CreateTestRequest("test-critical-steps-" + tt.name)
+
+ reconciler := CreateSchedulingReconciler(fakeClient)
+ _, err := reconciler.Reconcile(t.Context(), req)
+ if err != nil {
+ t.Fatalf("Reconcile returned an error: %v", err)
+ }
+
+ // Fetch and verify the updated resource
+ updatedResource := AssertResourceExists(t, fakeClient, "test-critical-steps-"+tt.name)
+ AssertResultCount(t, updatedResource, 1)
+
+ result := updatedResource.Status.Results[0]
+ if result.ID != "decision-1" {
+ t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID)
+ }
+
+ // Verify the description contains the expected critical step message
+ AssertDescriptionContains(t, result.Description, tt.expectedCriticalMessage)
+
+ t.Logf("Critical step test %s completed: %s", tt.name, result.Description)
+ })
+ }
+}
+
+func TestReconcileGlobalDescription(t *testing.T) {
+ tests := []struct {
+ name string
+ decisions []v1alpha1.SchedulingDecisionRequest
+ expectedGlobalDescription string
+ }{
+ {
+ name: "single-decision-with-global",
+ decisions: []v1alpha1.SchedulingDecisionRequest{
+ NewTestDecision("decision-1").
+ WithInput(map[string]float64{"host1": 1.0, "host2": 2.0}).
+ WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 1.5, "host2": 0.0})).
+ Build(),
+ },
+ expectedGlobalDescription: "chain: host1 (0m)", // Single decision shows chain with 0m duration - host1 wins with 2.5 vs host2 with 2.0
+ },
+ {
+ name: "simple-chain-no-loop",
+ decisions: []v1alpha1.SchedulingDecisionRequest{
+ NewTestDecision("decision-1").
+ WithRequestedAt(time.Now().Add(-5 * time.Hour)).
+ WithInput(map[string]float64{"host1": 1.0, "host2": 2.0}).
+ WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 2.0, "host2": 0.0})).
+ Build(),
+ NewTestDecision("decision-2").
+ WithRequestedAt(time.Now().Add(-3 * time.Hour)).
+ WithInput(map[string]float64{"host2": 1.0, "host3": 2.0}).
+ WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host2": 1.5, "host3": 0.0})).
+ Build(),
+ NewTestDecision("decision-3").
+ WithRequestedAt(time.Now().Add(-1 * time.Hour)).
+ WithInput(map[string]float64{"host2": 1.0, "host3": 2.0}).
+ WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host2": 1.5, "host3": 0.0})).
+ Build(),
+ NewTestDecision("decision-4").
+ WithRequestedAt(time.Now()).
+ WithInput(map[string]float64{"host3": 1.0, "host4": 2.0}).
+ WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host3": 0.0, "host4": 1.0})).
+ Build(),
+ },
+ expectedGlobalDescription: "chain: host1 (2h) -> host2 (3h; 2 decisions) -> host4 (0m)",
+ },
+ {
+ name: "chain-with-loop",
+ decisions: []v1alpha1.SchedulingDecisionRequest{
+ NewTestDecision("decision-1").
+ WithRequestedAt(time.Now().Add(-5 * time.Hour)).
+ WithInput(map[string]float64{"host1": 1.0, "host2": 2.0}).
+ WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 2.0, "host2": 0.0})).
+ Build(),
+ NewTestDecision("decision-2").
+ WithRequestedAt(time.Now().Add(-2 * time.Hour)).
+ WithInput(map[string]float64{"host1": 1.0, "host2": 2.0}).
+ WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 0.0, "host2": 1.0})).
+ Build(),
+ NewTestDecision("decision-3").
+ WithRequestedAt(time.Now().Add(-1 * time.Hour)).
+ WithInput(map[string]float64{"host1": 1.0, "host2": 2.0}).
+ WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 2.0, "host2": 0.0})).
+ Build(),
+ NewTestDecision("decision-4").
+ WithRequestedAt(time.Now()).
+ WithInput(map[string]float64{"host3": 1.0}).
+ WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host3": 0.0})).
+ Build(),
+ },
+ expectedGlobalDescription: "chain (loop detected): host1 (3h) -> host2 (1h) -> host1 (1h) -> host3 (0m)",
+ },
+ {
+ name: "same-host-all-decisions-no-loop",
+ decisions: []v1alpha1.SchedulingDecisionRequest{
+ NewTestDecision("decision-1").
+ WithRequestedAt(time.Now().Add(-2 * time.Hour)).
+ WithInput(map[string]float64{"host1": 2.0, "host2": 1.0}).
+ WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 1.0, "host2": 0.0})).
+ Build(),
+ NewTestDecision("decision-2").
+ WithRequestedAt(time.Now()).
+ WithInput(map[string]float64{"host1": 2.0, "host3": 1.0}).
+ WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 1.0, "host3": 0.0})).
+ Build(),
+ },
+ expectedGlobalDescription: "chain: host1 (0m; 2 decisions)", // Last segment always shows 0m duration
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ resource := NewTestSchedulingDecision("test-global-" + tt.name).
+ WithDecisions(tt.decisions...).
+ Build()
+
+ fakeClient, _ := SetupTestEnvironment(t, resource)
+ req := CreateTestRequest("test-global-" + tt.name)
+
+ reconciler := CreateSchedulingReconciler(fakeClient)
+ _, err := reconciler.Reconcile(t.Context(), req)
+ if err != nil {
+ t.Fatalf("Reconcile returned an error: %v", err)
+ }
+
+ // Fetch and verify the updated resource
+ updatedResource := AssertResourceExists(t, fakeClient, "test-global-"+tt.name)
+ AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateResolved)
+ AssertDecisionCount(t, updatedResource, len(tt.decisions))
+
+ // Verify global description
+ if updatedResource.Status.GlobalDescription != tt.expectedGlobalDescription {
+ t.Errorf("Expected global description '%s', got '%s'",
+ tt.expectedGlobalDescription, updatedResource.Status.GlobalDescription)
+ }
+
+ t.Logf("Global description test %s completed: '%s'", tt.name, updatedResource.Status.GlobalDescription)
+ })
+ }
+}
+
+// TestReconcileEmptyDecisionsList tests the case where no decisions are provided
+func TestReconcileEmptyDecisionsList(t *testing.T) {
+ resource := NewTestSchedulingDecision("test-empty-decisions").
+ WithDecisions(). // No decisions provided
+ Build()
+
+ fakeClient, _ := SetupTestEnvironment(t, resource)
+ req := CreateTestRequest("test-empty-decisions")
+
+ reconciler := CreateSchedulingReconciler(fakeClient)
+ _, err := reconciler.Reconcile(t.Context(), req)
+ if err != nil {
+ t.Fatalf("Reconcile returned an error: %v", err)
+ }
+
+ // Fetch and verify the updated resource
+ updatedResource := AssertResourceExists(t, fakeClient, "test-empty-decisions")
+ AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateError)
+ AssertResourceError(t, updatedResource, "No decisions provided in spec")
+
+ t.Logf("Empty decisions test completed: state=%s, error=%s", updatedResource.Status.State, updatedResource.Status.Error)
+}
+
+// TestReconcileResourceNotFound tests the case where the resource is deleted during reconciliation
+func TestReconcileResourceNotFound(t *testing.T) {
+ fakeClient, _ := SetupTestEnvironment(t) // No resource created
+ req := CreateTestRequest("non-existent-resource")
+
+ reconciler := CreateSchedulingReconciler(fakeClient)
+ _, err := reconciler.Reconcile(t.Context(), req)
+
+ // Should gracefully handle when resource is not found (no error)
+ // This can happen when TTL controller deletes a resource while main controller has queued reconcile request
+ if err != nil {
+ t.Fatalf("Expected no error when resource not found (should be handled gracefully), got: %v", err)
+ }
+
+ t.Logf("Resource not found test completed: gracefully handled with no error")
+}
+
+// TestUtilityFunctions tests the standalone utility functions
+func TestUtilityFunctions(t *testing.T) {
+ t.Run("findWinner", func(t *testing.T) {
+ tests := []struct {
+ name string
+ scores map[string]float64
+ expectedWinner string
+ expectedScore float64
+ }{
+ {
+ name: "empty-map",
+ scores: map[string]float64{},
+ expectedWinner: "",
+ expectedScore: MinScoreValue,
+ },
+ {
+ name: "single-host",
+ scores: map[string]float64{"host1": 5.0},
+ expectedWinner: "host1",
+ expectedScore: 5.0,
+ },
+ {
+ name: "clear-winner",
+ scores: map[string]float64{"host1": 3.0, "host2": 1.0, "host3": 2.0},
+ expectedWinner: "host1",
+ expectedScore: 3.0,
+ },
+ {
+ name: "tied-scores",
+ scores: map[string]float64{"host1": 2.0, "host2": 2.0},
+ expectedWinner: "", // Don't check specific winner for tied scores (map iteration order is not deterministic)
+ expectedScore: 2.0,
+ },
+ {
+ name: "negative-scores",
+ scores: map[string]float64{"host1": -1.0, "host2": -2.0},
+ expectedWinner: "host1",
+ expectedScore: -1.0,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ winner, score := findWinner(tt.scores)
+ if tt.expectedWinner != "" && winner != tt.expectedWinner {
+ t.Errorf("Expected winner '%s', got '%s'", tt.expectedWinner, winner)
+ }
+ if score != tt.expectedScore {
+ t.Errorf("Expected score %f, got %f", tt.expectedScore, score)
+ }
+ // For tied scores, just verify we got one of the tied hosts
+ if tt.name == "tied-scores" {
+ if winner != "host1" && winner != "host2" {
+ t.Errorf("Expected winner to be either 'host1' or 'host2', got '%s'", winner)
+ }
+ }
+ })
+ }
+ })
+
+ t.Run("mapToSortedHostScores", func(t *testing.T) {
+ scores := map[string]float64{
+ "host1": 1.0,
+ "host2": 3.0,
+ "host3": 2.0,
+ }
+ sorted := mapToSortedHostScores(scores)
+
+ if len(sorted) != 3 {
+ t.Errorf("Expected 3 sorted hosts, got %d", len(sorted))
+ }
+
+ // Should be sorted by score descending
+ if sorted[0].host != "host2" || sorted[0].score != 3.0 {
+ t.Errorf("Expected first host to be host2 with score 3.0, got %s with %f", sorted[0].host, sorted[0].score)
+ }
+ if sorted[1].host != "host3" || sorted[1].score != 2.0 {
+ t.Errorf("Expected second host to be host3 with score 2.0, got %s with %f", sorted[1].host, sorted[1].score)
+ }
+ if sorted[2].host != "host1" || sorted[2].score != 1.0 {
+ t.Errorf("Expected third host to be host1 with score 1.0, got %s with %f", sorted[2].host, sorted[2].score)
+ }
+ })
+
+ t.Run("findHostPosition", func(t *testing.T) {
+ hosts := []hostScore{
+ {host: "host2", score: 3.0},
+ {host: "host3", score: 2.0},
+ {host: "host1", score: 1.0},
+ }
+
+ tests := []struct {
+ targetHost string
+ expectedPosition int
+ }{
+ {"host2", 1}, // First position
+ {"host3", 2}, // Second position
+ {"host1", 3}, // Third position
+ {"host4", -1}, // Not found
+ }
+
+ for _, tt := range tests {
+ position := findHostPosition(hosts, tt.targetHost)
+ if position != tt.expectedPosition {
+ t.Errorf("Expected position %d for host %s, got %d", tt.expectedPosition, tt.targetHost, position)
+ }
+ }
+ })
+
+ t.Run("getCertaintyLevel", func(t *testing.T) {
+ tests := []struct {
+ gap float64
+ expectedCertainty string
+ }{
+ {1.0, "high"}, // >= 0.5
+ {0.5, "high"}, // exactly 0.5
+ {0.3, "medium"}, // >= 0.2, < 0.5
+ {0.2, "medium"}, // exactly 0.2
+ {0.1, "low"}, // >= 0.0, < 0.2
+ {0.0, "low"}, // exactly 0.0
+ {-0.1, "low"}, // < 0.0
+ }
+
+ for _, tt := range tests {
+ certainty := getCertaintyLevel(tt.gap)
+ if certainty != tt.expectedCertainty {
+ t.Errorf("Expected certainty '%s' for gap %f, got '%s'", tt.expectedCertainty, tt.gap, certainty)
+ }
+ }
+ })
+}
+
+// TestStepImpactAnalysis tests the step impact calculation logic
+func TestStepImpactAnalysis(t *testing.T) {
+ reconciler := &SchedulingDecisionReconciler{}
+
+ t.Run("promotion-scenarios", func(t *testing.T) {
+ input := map[string]float64{
+ "host1": 1.0, // Will become winner
+ "host2": 3.0, // Initial winner
+ "host3": 2.0,
+ }
+
+ outputs := []v1alpha1.SchedulingDecisionPipelineOutputSpec{
+ {
+ Step: "promotion-step",
+ Activations: map[string]float64{
+ "host1": 2.5, // host1: 3.5 (becomes winner)
+ "host2": -0.5, // host2: 2.5 (demoted)
+ "host3": 0.0, // host3: 2.0
+ },
+ },
+ }
+
+ finalScores := map[string]float64{
+ "host1": 3.5,
+ "host2": 2.5,
+ "host3": 2.0,
+ }
+
+ impacts := reconciler.calculateStepImpacts(input, outputs, finalScores)
+
+ if len(impacts) != 1 {
+ t.Fatalf("Expected 1 step impact, got %d", len(impacts))
+ }
+
+ impact := impacts[0]
+ if impact.Step != "promotion-step" {
+ t.Errorf("Expected step 'promotion-step', got '%s'", impact.Step)
+ }
+ if !impact.PromotedToFirst {
+ t.Errorf("Expected PromotedToFirst to be true")
+ }
+ if impact.ScoreDelta != 2.5 {
+ t.Errorf("Expected ScoreDelta 2.5, got %f", impact.ScoreDelta)
+ }
+ if impact.CompetitorsRemoved != 0 {
+ t.Errorf("Expected CompetitorsRemoved 0, got %d", impact.CompetitorsRemoved)
+ }
+ })
+
+ t.Run("competitor-removal", func(t *testing.T) {
+ input := map[string]float64{
+ "host1": 1.0, // Will become winner after competitors removed
+ "host2": 3.0, // Initial winner, will be removed
+ "host3": 2.0, // Will be removed
+ }
+
+ outputs := []v1alpha1.SchedulingDecisionPipelineOutputSpec{
+ {
+ Step: "filter-step",
+ Activations: map[string]float64{
+ "host1": 0.0, // Only host1 survives
+ },
+ },
+ }
+
+ finalScores := map[string]float64{
+ "host1": 1.0,
+ }
+
+ impacts := reconciler.calculateStepImpacts(input, outputs, finalScores)
+
+ if len(impacts) != 1 {
+ t.Fatalf("Expected 1 step impact, got %d", len(impacts))
+ }
+
+ impact := impacts[0]
+ if impact.CompetitorsRemoved != 2 {
+ t.Errorf("Expected CompetitorsRemoved 2, got %d", impact.CompetitorsRemoved)
+ }
+ if !impact.PromotedToFirst {
+ t.Errorf("Expected PromotedToFirst to be true (host1 was not #1 before, became #1 after competitors removed)")
+ }
+ if impact.ScoreDelta != 0.0 {
+ t.Errorf("Expected ScoreDelta 0.0, got %f", impact.ScoreDelta)
+ }
+ })
+
+ t.Run("empty-inputs", func(t *testing.T) {
+ // Test with empty final scores
+ impacts := reconciler.calculateStepImpacts(map[string]float64{}, []v1alpha1.SchedulingDecisionPipelineOutputSpec{}, map[string]float64{})
+ if len(impacts) != 0 {
+ t.Errorf("Expected 0 impacts for empty inputs, got %d", len(impacts))
+ }
+
+ // Test with no outputs
+ impacts = reconciler.calculateStepImpacts(map[string]float64{"host1": 1.0}, []v1alpha1.SchedulingDecisionPipelineOutputSpec{}, map[string]float64{"host1": 1.0})
+ if len(impacts) != 0 {
+ t.Errorf("Expected 0 impacts for no outputs, got %d", len(impacts))
+ }
+ })
+}
+
+// TestLargeDatasetPerformance tests the controller with larger datasets
+func TestLargeDatasetPerformance(t *testing.T) {
+ // Create a decision with many hosts
+ input := make(map[string]float64)
+ activations := make(map[string]float64)
+
+ for i := 0; i < 100; i++ {
+ hostName := fmt.Sprintf("host%d", i)
+ input[hostName] = float64(i)
+ activations[hostName] = float64(i % 10) // Vary activations
+ }
+
+ decision := NewTestDecision("large-decision").
+ WithInput(input).
+ WithPipelineOutputs(
+ NewTestPipelineOutput("weigher1", activations),
+ NewTestPipelineOutput("weigher2", activations),
+ NewTestPipelineOutput("weigher3", activations),
+ ).
+ Build()
+
+ resource := NewTestSchedulingDecision("test-large-dataset").
+ WithDecisions(decision).
+ Build()
+
+ fakeClient, _ := SetupTestEnvironment(t, resource)
+ req := CreateTestRequest("test-large-dataset")
+
+ reconciler := CreateSchedulingReconciler(fakeClient)
+
+ start := time.Now()
+ _, err := reconciler.Reconcile(t.Context(), req)
+ duration := time.Since(start)
+
+ if err != nil {
+ t.Fatalf("Reconcile returned an error: %v", err)
+ }
+
+ // Verify the result
+ updatedResource := AssertResourceExists(t, fakeClient, "test-large-dataset")
+ AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateResolved)
+ AssertResultCount(t, updatedResource, 1)
+
+ result := updatedResource.Status.Results[0]
+ if len(result.FinalScores) != 100 {
+ t.Errorf("Expected 100 final scores, got %d", len(result.FinalScores))
+ }
+
+ t.Logf("Large dataset test completed in %v with %d hosts", duration, len(result.FinalScores))
+
+ // Performance check - should complete within reasonable time
+ if duration > 5*time.Second {
+ t.Errorf("Large dataset processing took too long: %v", duration)
+ }
+}
diff --git a/decisions/internal/controller/test_helpers.go b/decisions/internal/controller/test_helpers.go
new file mode 100644
index 00000000..c61ef73a
--- /dev/null
+++ b/decisions/internal/controller/test_helpers.go
@@ -0,0 +1,325 @@
+// Copyright 2025 SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package controller
+
+import (
+ "strings"
+ "testing"
+ "time"
+
+ "k8s.io/apimachinery/pkg/api/resource"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/runtime"
+ ctrl "sigs.k8s.io/controller-runtime"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/client/fake"
+
+ decisionsv1alpha1 "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1"
+)
+
+// Test constants to reduce magic numbers
+const (
+ DefaultTestTTL = 2 * time.Hour
+ DefaultTestAge = 1 * time.Hour
+ OldTestAge = 3 * time.Hour
+ TestTolerance = 1 * time.Minute
+ DefaultTestVCPUs = 1
+ DefaultTestRAM = 2048
+ DefaultTestDisk = 10
+)
+
+// TestDecisionBuilder helps build SchedulingDecisionRequest objects for tests
+type TestDecisionBuilder struct {
+ decision decisionsv1alpha1.SchedulingDecisionRequest
+}
+
+func NewTestDecision(id string) *TestDecisionBuilder {
+ return &TestDecisionBuilder{
+ decision: decisionsv1alpha1.SchedulingDecisionRequest{
+ ID: id,
+ RequestedAt: metav1.NewTime(time.Now()),
+ EventType: decisionsv1alpha1.SchedulingEventTypeInitialPlacement,
+ Input: map[string]float64{
+ "host1": 1.0,
+ },
+ Pipeline: decisionsv1alpha1.SchedulingDecisionPipelineSpec{
+ Name: "test-pipeline",
+ },
+ Flavor: decisionsv1alpha1.Flavor{
+ Name: "test-flavor",
+ Resources: map[string]resource.Quantity{
+ "cpu": *resource.NewQuantity(int64(DefaultTestVCPUs), resource.DecimalSI),
+ "memory": *resource.NewQuantity(int64(DefaultTestRAM), resource.DecimalSI),
+ "storage": *resource.NewQuantity(int64(DefaultTestDisk), resource.DecimalSI),
+ },
+ },
+ },
+ }
+}
+
+// WithRequestedAt sets the RequestedAt timestamp
+func (b *TestDecisionBuilder) WithRequestedAt(t time.Time) *TestDecisionBuilder {
+ b.decision.RequestedAt = metav1.NewTime(t)
+ return b
+}
+
+// WithInput sets the input hosts and scores
+func (b *TestDecisionBuilder) WithInput(input map[string]float64) *TestDecisionBuilder {
+ b.decision.Input = input
+ return b
+}
+
+// WithPipelineOutputs sets the pipeline outputs
+func (b *TestDecisionBuilder) WithPipelineOutputs(outputs ...decisionsv1alpha1.SchedulingDecisionPipelineOutputSpec) *TestDecisionBuilder {
+ b.decision.Pipeline.Outputs = outputs
+ return b
+}
+
+// WithEventType sets the event type
+func (b *TestDecisionBuilder) WithEventType(eventType decisionsv1alpha1.SchedulingEventType) *TestDecisionBuilder {
+ b.decision.EventType = eventType
+ return b
+}
+
+// Build returns the built SchedulingDecisionRequest
+func (b *TestDecisionBuilder) Build() decisionsv1alpha1.SchedulingDecisionRequest {
+ return b.decision
+}
+
+// TestSchedulingDecisionBuilder helps build SchedulingDecision objects for tests
+type TestSchedulingDecisionBuilder struct {
+ resource decisionsv1alpha1.SchedulingDecision
+}
+
+// NewTestSchedulingDecision creates a new test SchedulingDecision builder
+func NewTestSchedulingDecision(name string) *TestSchedulingDecisionBuilder {
+ return &TestSchedulingDecisionBuilder{
+ resource: decisionsv1alpha1.SchedulingDecision{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: name,
+ },
+ Spec: decisionsv1alpha1.SchedulingDecisionSpec{
+ Decisions: []decisionsv1alpha1.SchedulingDecisionRequest{},
+ },
+ },
+ }
+}
+
+// WithDecisions adds decisions to the SchedulingDecision
+func (b *TestSchedulingDecisionBuilder) WithDecisions(decisions ...decisionsv1alpha1.SchedulingDecisionRequest) *TestSchedulingDecisionBuilder {
+ b.resource.Spec.Decisions = decisions
+ return b
+}
+
+// WithCreationTimestamp sets the creation timestamp
+func (b *TestSchedulingDecisionBuilder) WithCreationTimestamp(t time.Time) *TestSchedulingDecisionBuilder {
+ b.resource.ObjectMeta.CreationTimestamp = metav1.NewTime(t)
+ return b
+}
+
+// WithNamespace sets the namespace
+func (b *TestSchedulingDecisionBuilder) WithNamespace(namespace string) *TestSchedulingDecisionBuilder {
+ b.resource.ObjectMeta.Namespace = namespace
+ return b
+}
+
+// Build returns the built SchedulingDecision
+func (b *TestSchedulingDecisionBuilder) Build() *decisionsv1alpha1.SchedulingDecision {
+ return &b.resource
+}
+
+// NewTestPipelineOutput creates a pipeline output spec for testing
+func NewTestPipelineOutput(step string, activations map[string]float64) decisionsv1alpha1.SchedulingDecisionPipelineOutputSpec {
+ return decisionsv1alpha1.SchedulingDecisionPipelineOutputSpec{
+ Step: step,
+ Activations: activations,
+ }
+}
+
+// SetupTestEnvironment creates a fake client and scheme for testing
+func SetupTestEnvironment(t *testing.T, resources ...client.Object) (client.Client, *runtime.Scheme) {
+ t.Helper()
+
+ scheme := runtime.NewScheme()
+ if err := decisionsv1alpha1.AddToScheme(scheme); err != nil {
+ t.Fatalf("Failed to add scheme: %v", err)
+ }
+
+ clientBuilder := fake.NewClientBuilder().WithScheme(scheme)
+ if len(resources) > 0 {
+ clientBuilder = clientBuilder.WithObjects(resources...)
+ }
+
+ // Add status subresource for SchedulingDecision
+ fakeClient := clientBuilder.WithStatusSubresource(&decisionsv1alpha1.SchedulingDecision{}).Build()
+
+ return fakeClient, scheme
+}
+
+// CreateTestRequest creates a controller request for testing
+func CreateTestRequest(name string, namespace ...string) ctrl.Request {
+ req := ctrl.Request{
+ NamespacedName: client.ObjectKey{
+ Name: name,
+ },
+ }
+ if len(namespace) > 0 {
+ req.NamespacedName.Namespace = namespace[0]
+ }
+ return req
+}
+
+// AssertResourceExists checks that a resource exists and returns it
+func AssertResourceExists(t *testing.T, c client.Client, name string, namespace ...string) *decisionsv1alpha1.SchedulingDecision {
+ t.Helper()
+
+ key := client.ObjectKey{Name: name}
+ if len(namespace) > 0 {
+ key.Namespace = namespace[0]
+ }
+
+ var resource decisionsv1alpha1.SchedulingDecision
+ if err := c.Get(t.Context(), key, &resource); err != nil {
+ t.Fatalf("Resource %s should exist: %v", name, err)
+ }
+ return &resource
+}
+
+// AssertResourceDeleted checks that a resource has been deleted
+func AssertResourceDeleted(t *testing.T, c client.Client, name string, namespace ...string) {
+ t.Helper()
+
+ key := client.ObjectKey{Name: name}
+ if len(namespace) > 0 {
+ key.Namespace = namespace[0]
+ }
+
+ var resource decisionsv1alpha1.SchedulingDecision
+ err := c.Get(t.Context(), key, &resource)
+ if err == nil {
+ t.Errorf("Resource %s should have been deleted", name)
+ }
+}
+
+// AssertResourceState checks the state of a SchedulingDecision
+func AssertResourceState(t *testing.T, resource *decisionsv1alpha1.SchedulingDecision, expectedState decisionsv1alpha1.SchedulingDecisionState) {
+ t.Helper()
+
+ if resource.Status.State != expectedState {
+ t.Errorf("Expected state '%s', got '%s'", expectedState, resource.Status.State)
+ }
+}
+
+// AssertResourceError checks the error message of a SchedulingDecision
+func AssertResourceError(t *testing.T, resource *decisionsv1alpha1.SchedulingDecision, expectedError string) {
+ t.Helper()
+
+ if resource.Status.Error != expectedError {
+ t.Errorf("Expected error '%s', got '%s'", expectedError, resource.Status.Error)
+ }
+}
+
+// AssertNoError checks that there's no error in the resource status
+func AssertNoError(t *testing.T, resource *decisionsv1alpha1.SchedulingDecision) {
+ t.Helper()
+
+ if resource.Status.Error != "" {
+ t.Errorf("Expected no error, got '%s'", resource.Status.Error)
+ }
+}
+
+// AssertResultCount checks the number of results in a SchedulingDecision
+func AssertResultCount(t *testing.T, resource *decisionsv1alpha1.SchedulingDecision, expectedCount int) {
+ t.Helper()
+
+ if len(resource.Status.Results) != expectedCount {
+ t.Errorf("Expected %d results, got %d", expectedCount, len(resource.Status.Results))
+ }
+}
+
+// AssertDecisionCount checks the decision count in a SchedulingDecision
+func AssertDecisionCount(t *testing.T, resource *decisionsv1alpha1.SchedulingDecision, expectedCount int) {
+ t.Helper()
+
+ if resource.Status.DecisionCount != expectedCount {
+ t.Errorf("Expected decision count %d, got %d", expectedCount, resource.Status.DecisionCount)
+ }
+}
+
+// AssertFinalScores checks the final scores in a result
+func AssertFinalScores(t *testing.T, result decisionsv1alpha1.SchedulingDecisionResult, expectedScores map[string]float64) {
+ t.Helper()
+
+ if len(result.FinalScores) != len(expectedScores) {
+ t.Errorf("Expected %d final scores, got %d", len(expectedScores), len(result.FinalScores))
+ }
+
+ for host, expectedScore := range expectedScores {
+ if actualScore, exists := result.FinalScores[host]; !exists {
+ t.Errorf("Expected final score for host '%s', but it was not found", host)
+ } else if actualScore != expectedScore {
+ t.Errorf("Expected final score for host '%s' to be %f, got %f", host, expectedScore, actualScore)
+ }
+ }
+}
+
+// AssertDeletedHosts checks the deleted hosts in a result
+func AssertDeletedHosts(t *testing.T, result decisionsv1alpha1.SchedulingDecisionResult, expectedDeletedHosts map[string][]string) {
+ t.Helper()
+
+ if len(result.DeletedHosts) != len(expectedDeletedHosts) {
+ t.Errorf("Expected %d deleted hosts, got %d", len(expectedDeletedHosts), len(result.DeletedHosts))
+ }
+
+ for host, expectedSteps := range expectedDeletedHosts {
+ if actualSteps, exists := result.DeletedHosts[host]; !exists {
+ t.Errorf("Expected deleted host '%s', but it was not found", host)
+ } else if len(actualSteps) != len(expectedSteps) {
+ t.Errorf("Expected host '%s' to be deleted by %d steps, got %d", host, len(expectedSteps), len(actualSteps))
+ } else {
+ for i, expectedStep := range expectedSteps {
+ if actualSteps[i] != expectedStep {
+ t.Errorf("Expected host '%s' step %d to be '%s', got '%s'", host, i, expectedStep, actualSteps[i])
+ }
+ }
+ }
+ }
+}
+
+// AssertDescriptionContains checks that a description contains expected text
+func AssertDescriptionContains(t *testing.T, description string, expectedContents ...string) {
+ t.Helper()
+
+ for _, expectedContent := range expectedContents {
+ if !strings.Contains(description, expectedContent) {
+ t.Errorf("Expected description to contain '%s', got '%s'", expectedContent, description)
+ }
+ }
+}
+
+// CreateTTLReconciler creates a TTL reconciler with the given TTL duration
+// If ttlSeconds is 0, the reconciler will use its internal default
+func CreateTTLReconciler(fakeClient client.Client, scheme *runtime.Scheme, ttl time.Duration) *SchedulingDecisionTTLController {
+ ttlSeconds := int(ttl.Seconds())
+ return &SchedulingDecisionTTLController{
+ Client: fakeClient,
+ Scheme: scheme,
+ Conf: Config{
+ TTLAfterDecisionSeconds: ttlSeconds,
+ },
+ }
+}
+
+// CreateSchedulingReconciler creates a scheduling decision reconciler
+// If conf is empty, uses default empty config
+func CreateSchedulingReconciler(fakeClient client.Client, conf ...Config) *SchedulingDecisionReconciler {
+ var config Config
+ if len(conf) > 0 {
+ config = conf[0]
+ }
+ return &SchedulingDecisionReconciler{
+ Conf: config,
+ Client: fakeClient,
+ }
+}
diff --git a/decisions/internal/controller/ttl_controller.go b/decisions/internal/controller/ttl_controller.go
new file mode 100644
index 00000000..db5affa2
--- /dev/null
+++ b/decisions/internal/controller/ttl_controller.go
@@ -0,0 +1,165 @@
+// Copyright 2025 SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package controller
+
+import (
+ "context"
+ "time"
+
+ "github.com/go-logr/logr"
+ "k8s.io/apimachinery/pkg/runtime"
+ ctrl "sigs.k8s.io/controller-runtime"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/controller"
+ logf "sigs.k8s.io/controller-runtime/pkg/log"
+ "sigs.k8s.io/controller-runtime/pkg/predicate"
+
+ decisionsv1alpha1 "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1"
+)
+
+// TTLStartupReconciler handles startup reconciliation for existing resources
+type TTLStartupReconciler struct {
+ ttlController *SchedulingDecisionTTLController
+}
+
+// Start implements the Runnable interface and runs startup reconciliation
+func (s *TTLStartupReconciler) Start(ctx context.Context) error {
+ log := logf.FromContext(ctx).WithName("ttl-startup-reconciler")
+ log.Info("Starting TTL startup reconciliation for existing resources")
+
+ s.ttlController.reconcileAllResourcesOnStartup(ctx)
+ return nil
+}
+
+// SchedulingDecisionTTLController handles automatic cleanup of resolved SchedulingDecision resources
+// after a configurable TTL period.
+type SchedulingDecisionTTLController struct {
+ // Client for the kubernetes API.
+ client.Client
+ // Kubernetes scheme to use for the decisions.
+ Scheme *runtime.Scheme
+ // Configuration for the TTL controller.
+ Conf Config
+}
+
+// +kubebuilder:rbac:groups=decisions.cortex,resources=schedulingdecisions,verbs=get;list;watch;delete
+// +kubebuilder:rbac:groups=decisions.cortex,resources=schedulingdecisions/status,verbs=get
+
+func (r *SchedulingDecisionTTLController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+ log := logf.FromContext(ctx).WithName("ttl-controller")
+
+ // Fetch the decision object
+ var decision decisionsv1alpha1.SchedulingDecision
+ if err := r.Get(ctx, req.NamespacedName, &decision); err != nil {
+ // Resource was deleted or doesn't exist - nothing to clean up
+ return ctrl.Result{}, client.IgnoreNotFound(err)
+ }
+
+ return r.processResourceForTTL(ctx, &decision, log)
+}
+
+func (r *SchedulingDecisionTTLController) getTTL() time.Duration {
+ if r.Conf.TTLAfterDecisionSeconds > 0 {
+ return time.Duration(r.Conf.TTLAfterDecisionSeconds) * time.Second
+ }
+ return time.Duration(DefaultTTLAfterDecisionSeconds) * time.Second
+}
+
+// processResourceForTTL handles the common TTL logic for a single resource
+func (r *SchedulingDecisionTTLController) processResourceForTTL(ctx context.Context, decision *decisionsv1alpha1.SchedulingDecision, log logr.Logger) (ctrl.Result, error) {
+ // Calculate age based on last decision's RequestedAt timestamp
+ var referenceTime time.Time
+ if len(decision.Spec.Decisions) > 0 {
+ // Use the last decision's RequestedAt timestamp
+ lastDecision := decision.Spec.Decisions[len(decision.Spec.Decisions)-1]
+ referenceTime = lastDecision.RequestedAt.Time
+ } else {
+ // Fallback to creation timestamp if no decisions exist
+ referenceTime = decision.CreationTimestamp.Time
+ }
+
+ age := time.Since(referenceTime)
+ ttl := r.getTTL()
+
+ if age >= ttl {
+ // TTL has expired - delete the resource
+ log.Info("Deleting expired SchedulingDecision",
+ "name", decision.Name,
+ "age", age.String(),
+ "ttl", ttl.String())
+
+ if err := r.Delete(ctx, decision); err != nil {
+ if client.IgnoreNotFound(err) != nil {
+ log.Error(err, "Failed to delete expired SchedulingDecision", "name", decision.Name)
+ return ctrl.Result{}, err
+ }
+ log.V(1).Info("SchedulingDecision was already deleted", "name", decision.Name)
+ }
+
+ return ctrl.Result{}, nil
+ }
+
+ remainingTime := ttl - age
+ log.V(1).Info("Scheduling SchedulingDecision for future deletion",
+ "name", decision.Name,
+ "remainingTime", remainingTime.String())
+
+ return ctrl.Result{RequeueAfter: remainingTime}, nil
+}
+
+// reconcileAllResourcesOnStartup processes all existing SchedulingDecision resources
+// to check for expired ones that should be cleaned up after controller restart
+func (r *SchedulingDecisionTTLController) reconcileAllResourcesOnStartup(ctx context.Context) {
+ log := logf.FromContext(ctx).WithName("ttl-startup-reconciler")
+
+ var resources decisionsv1alpha1.SchedulingDecisionList
+ if err := r.List(ctx, &resources); err != nil {
+ log.Error(err, "Failed to list SchedulingDecision resources during startup reconciliation")
+ return
+ }
+
+ log.Info("Processing existing resources for TTL cleanup", "resourceCount", len(resources.Items))
+
+ processedCount := 0
+ expiredCount := 0
+
+ for _, resource := range resources.Items {
+ // Use the shared TTL processing logic
+ result, err := r.processResourceForTTL(ctx, &resource, log)
+ if err != nil {
+ log.Error(err, "Failed to process resource during startup reconciliation", "name", resource.Name)
+ } else if result.RequeueAfter == 0 {
+ // Resource was deleted (no requeue means it was expired and deleted)
+ expiredCount++
+ }
+ processedCount++
+ }
+
+ log.Info("Startup TTL reconciliation completed",
+ "processedResources", processedCount,
+ "expiredResources", expiredCount)
+}
+
+func (r *SchedulingDecisionTTLController) SetupWithManager(mgr ctrl.Manager) error {
+ log := mgr.GetLogger().WithName("ttl-controller")
+
+ log.Info("TTL Controller configured", "ttlAfterDecisionSeconds", r.getTTL().String())
+
+ // Add the startup reconciler as a runnable
+ if err := mgr.Add(&TTLStartupReconciler{ttlController: r}); err != nil {
+ return err
+ }
+
+ return ctrl.NewControllerManagedBy(mgr).
+ For(&decisionsv1alpha1.SchedulingDecision{}).
+ Named("schedulingdecision-ttl").
+ WithOptions(controller.Options{
+ MaxConcurrentReconciles: 10,
+ }).
+ WithEventFilter(
+ // Watch for spec changes (when decisions are added/modified)
+ predicate.GenerationChangedPredicate{},
+ ).
+ Complete(r)
+}
diff --git a/decisions/internal/controller/ttl_controller_test.go b/decisions/internal/controller/ttl_controller_test.go
new file mode 100644
index 00000000..f4945625
--- /dev/null
+++ b/decisions/internal/controller/ttl_controller_test.go
@@ -0,0 +1,212 @@
+// Copyright 2025 SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package controller
+
+import (
+ "context"
+ "testing"
+ "time"
+)
+
+func TestTTLController(t *testing.T) {
+ tests := []struct {
+ name string
+ resourceAge time.Duration
+ ttl time.Duration
+ expectDeleted bool
+ expectRequeue bool
+ }{
+ {
+ name: "young resource preserved",
+ resourceAge: DefaultTestAge,
+ ttl: DefaultTestTTL,
+ expectDeleted: false,
+ expectRequeue: true,
+ },
+ {
+ name: "old resource deleted",
+ resourceAge: OldTestAge,
+ ttl: DefaultTestTTL,
+ expectDeleted: true,
+ expectRequeue: false,
+ },
+ {
+ name: "resource at TTL boundary deleted",
+ resourceAge: DefaultTestTTL,
+ ttl: DefaultTestTTL,
+ expectDeleted: true,
+ expectRequeue: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // Create test resource with specified age
+ decision := NewTestDecision("decision-1").
+ WithRequestedAt(time.Now().Add(-tt.resourceAge)).
+ Build()
+
+ resource := NewTestSchedulingDecision("test-decision").
+ WithDecisions(decision).
+ Build()
+
+ fakeClient, scheme := SetupTestEnvironment(t, resource)
+ reconciler := CreateTTLReconciler(fakeClient, scheme, tt.ttl)
+ req := CreateTestRequest("test-decision")
+
+ result, err := reconciler.Reconcile(context.Background(), req)
+ if err != nil {
+ t.Fatalf("Reconcile failed: %v", err)
+ }
+
+ // Check deletion expectation
+ if tt.expectDeleted {
+ AssertResourceDeleted(t, fakeClient, "test-decision")
+ } else {
+ AssertResourceExists(t, fakeClient, "test-decision")
+ }
+
+ // Check requeue expectation
+ if tt.expectRequeue && result.RequeueAfter == 0 {
+ t.Error("Expected requeue but got none")
+ }
+ if !tt.expectRequeue && result.RequeueAfter != 0 {
+ t.Error("Expected no requeue but got one")
+ }
+ })
+ }
+}
+
+func TestTTLControllerFallbackToCreationTimestamp(t *testing.T) {
+ // Resource with no decisions should use creation timestamp
+ resource := NewTestSchedulingDecision("empty-decision").
+ WithCreationTimestamp(time.Now().Add(-OldTestAge)).
+ Build()
+
+ fakeClient, scheme := SetupTestEnvironment(t, resource)
+ reconciler := CreateTTLReconciler(fakeClient, scheme, DefaultTestTTL)
+ req := CreateTestRequest("empty-decision")
+
+ result, err := reconciler.Reconcile(context.Background(), req)
+ if err != nil {
+ t.Fatalf("Reconcile failed: %v", err)
+ }
+
+ // Should be deleted and not requeued
+ AssertResourceDeleted(t, fakeClient, "empty-decision")
+ if result.RequeueAfter != 0 {
+ t.Error("Expected no requeue after deletion")
+ }
+}
+
+func TestTTLControllerDefaultTTL(t *testing.T) {
+ decision := NewTestDecision("decision-1").
+ WithRequestedAt(time.Now().Add(-DefaultTestAge)).
+ Build()
+
+ resource := NewTestSchedulingDecision("default-ttl-decision").
+ WithDecisions(decision).
+ Build()
+
+ fakeClient, scheme := SetupTestEnvironment(t, resource)
+
+ // Create reconciler without TTL config (should use default)
+ reconciler := CreateTTLReconciler(fakeClient, scheme, 0) // Zero duration means use default
+
+ req := CreateTestRequest("default-ttl-decision")
+ result, err := reconciler.Reconcile(context.Background(), req)
+ if err != nil {
+ t.Fatalf("Reconcile failed: %v", err)
+ }
+
+ // 1-hour-old resource with default TTL should be preserved
+ AssertResourceExists(t, fakeClient, "default-ttl-decision")
+ if result.RequeueAfter == 0 {
+ t.Error("Expected requeue for resource with default TTL")
+ }
+
+ // Verify requeue time is reasonable
+ expectedRequeue := time.Duration(DefaultTTLAfterDecisionSeconds)*time.Second - DefaultTestAge
+ if result.RequeueAfter < expectedRequeue-TestTolerance || result.RequeueAfter > expectedRequeue+TestTolerance {
+ t.Errorf("Requeue time %v not within expected range %v ± %v",
+ result.RequeueAfter, expectedRequeue, TestTolerance)
+ }
+}
+
+func TestTTLControllerNonExistentResource(t *testing.T) {
+ fakeClient, scheme := SetupTestEnvironment(t)
+ reconciler := CreateTTLReconciler(fakeClient, scheme, DefaultTestTTL)
+ req := CreateTestRequest("non-existent")
+
+ result, err := reconciler.Reconcile(context.Background(), req)
+ if err != nil {
+ t.Fatalf("Should handle non-existent resources gracefully: %v", err)
+ }
+
+ if result.RequeueAfter != 0 {
+ t.Error("Expected no requeue for non-existent resource")
+ }
+}
+
+func TestTTLStartupReconciliation(t *testing.T) {
+ // Create resources with different ages
+ expiredDecision := NewTestDecision("expired-decision").
+ WithRequestedAt(time.Now().Add(-OldTestAge)).
+ Build()
+
+ youngDecision := NewTestDecision("young-decision").
+ WithRequestedAt(time.Now().Add(-DefaultTestAge)).
+ Build()
+
+ expiredResource := NewTestSchedulingDecision("expired-resource").
+ WithDecisions(expiredDecision).
+ Build()
+
+ youngResource := NewTestSchedulingDecision("young-resource").
+ WithDecisions(youngDecision).
+ Build()
+
+ fakeClient, scheme := SetupTestEnvironment(t, expiredResource, youngResource)
+ reconciler := CreateTTLReconciler(fakeClient, scheme, DefaultTestTTL)
+
+ // Run startup reconciliation
+ reconciler.reconcileAllResourcesOnStartup(context.Background())
+
+ // Verify expired resource was deleted
+ AssertResourceDeleted(t, fakeClient, "expired-resource")
+
+ // Verify young resource still exists
+ AssertResourceExists(t, fakeClient, "young-resource")
+}
+
+func TestTTLStartupReconcilerRunnable(t *testing.T) {
+ fakeClient, scheme := SetupTestEnvironment(t)
+ reconciler := CreateTTLReconciler(fakeClient, scheme, DefaultTestTTL)
+
+ // Create the startup reconciler
+ startupReconciler := &TTLStartupReconciler{ttlController: reconciler}
+
+ // Test the Start method
+ err := startupReconciler.Start(context.Background())
+ if err != nil {
+ t.Fatalf("TTLStartupReconciler.Start() should not return error: %v", err)
+ }
+
+ // The method should complete without error (no resources to process)
+ t.Log("TTLStartupReconciler.Start() completed successfully")
+}
+
+func TestTTLStartupReconciliationErrorHandling(t *testing.T) {
+ // This test verifies that startup reconciliation handles errors gracefully
+ // We can't easily simulate List() failures with the fake client, but we can
+ // test that the method doesn't panic and handles empty results properly
+
+ fakeClient, scheme := SetupTestEnvironment(t) // No resources
+ reconciler := CreateTTLReconciler(fakeClient, scheme, DefaultTestTTL)
+
+ // This should complete without error even with no resources
+ reconciler.reconcileAllResourcesOnStartup(context.Background())
+
+ t.Log("Startup reconciliation handled empty resource list gracefully")
+}
diff --git a/go.mod b/go.mod
index f8096cb7..5284cafc 100644
--- a/go.mod
+++ b/go.mod
@@ -4,11 +4,13 @@ go 1.25.0
replace (
github.com/cobaltcore-dev/cortex/commands => ./commands
+ github.com/cobaltcore-dev/cortex/decisions/api => ./decisions/api
github.com/cobaltcore-dev/cortex/reservations/api => ./reservations/api
github.com/cobaltcore-dev/cortex/testlib => ./testlib
)
require (
+ github.com/cobaltcore-dev/cortex/decisions/api v0.0.0-00010101000000-000000000000
github.com/cobaltcore-dev/cortex/reservations/api v0.0.0-00010101000000-000000000000
github.com/dlmiddlecote/sqlstats v1.0.2
github.com/eclipse/paho.mqtt.golang v1.5.1
diff --git a/helm/library/cortex-core/templates/rbac.yaml b/helm/library/cortex-core/templates/rbac.yaml
index 57903041..baca03ab 100644
--- a/helm/library/cortex-core/templates/rbac.yaml
+++ b/helm/library/cortex-core/templates/rbac.yaml
@@ -1,7 +1,7 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
- name: {{ .Release.Namespace }}-{{ include "cortex.fullname" . }}
+ name: {{ .Release.Namespace }}-{{ include "cortex.fullname" . }}-computereservation
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
@@ -12,6 +12,20 @@ subjects:
name: {{ .Release.Namespace }}-{{ include "cortex.fullname" . }}
namespace: {{ .Release.Namespace }}
---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ name: {{ .Release.Namespace }}-{{ include "cortex.fullname" . }}-schedulingdescision
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ # From the decisions operator.
+ name: schedulingdecision-editor-role
+subjects:
+- kind: ServiceAccount
+ name: {{ .Release.Namespace }}-{{ include "cortex.fullname" . }}
+ namespace: {{ .Release.Namespace }}
+---
apiVersion: v1
kind: ServiceAccount
metadata:
diff --git a/internal/scheduler/cinder/api/http/api_test.go b/internal/scheduler/cinder/api/http/api_test.go
index dcf1b251..2c1e8cb2 100644
--- a/internal/scheduler/cinder/api/http/api_test.go
+++ b/internal/scheduler/cinder/api/http/api_test.go
@@ -21,6 +21,18 @@ type mockPipeline struct {
runFunc func(api.ExternalSchedulerRequest) ([]string, error)
}
+func (p *mockPipeline) SetConsumer(consumer scheduler.SchedulingDecisionConsumer[api.ExternalSchedulerRequest]) {
+
+}
+
+func (p *mockPipeline) Consume(
+ request api.ExternalSchedulerRequest,
+ applicationOrder []string,
+ inWeights map[string]float64,
+ stepWeights map[string]map[string]float64,
+) {
+}
+
func (m *mockPipeline) Run(req api.ExternalSchedulerRequest) ([]string, error) {
return m.runFunc(req)
}
diff --git a/internal/scheduler/manila/api/http/api_test.go b/internal/scheduler/manila/api/http/api_test.go
index 6a839216..2c85c6b2 100644
--- a/internal/scheduler/manila/api/http/api_test.go
+++ b/internal/scheduler/manila/api/http/api_test.go
@@ -21,6 +21,18 @@ type mockPipeline struct {
runFunc func(api.ExternalSchedulerRequest) ([]string, error)
}
+func (p *mockPipeline) SetConsumer(consumer scheduler.SchedulingDecisionConsumer[api.ExternalSchedulerRequest]) {
+
+}
+
+func (p *mockPipeline) Consume(
+ request api.ExternalSchedulerRequest,
+ applicationOrder []string,
+ inWeights map[string]float64,
+ stepWeights map[string]map[string]float64,
+) {
+}
+
func (m *mockPipeline) Run(req api.ExternalSchedulerRequest) ([]string, error) {
return m.runFunc(req)
}
diff --git a/internal/scheduler/nova/api/http/api.go b/internal/scheduler/nova/api/http/api.go
index 14b5b027..14986bdb 100644
--- a/internal/scheduler/nova/api/http/api.go
+++ b/internal/scheduler/nova/api/http/api.go
@@ -15,6 +15,7 @@ import (
"strings"
"time"
+ "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1"
"github.com/cobaltcore-dev/cortex/internal/conf"
"github.com/cobaltcore-dev/cortex/internal/db"
"github.com/cobaltcore-dev/cortex/internal/monitoring"
@@ -26,6 +27,9 @@ import (
"github.com/majewsky/gg/option"
"github.com/sapcc/go-api-declarations/liquid"
"github.com/sapcc/go-bits/jobloop"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+
+ ctrl "sigs.k8s.io/controller-runtime"
)
type HTTPAPI interface {
@@ -40,6 +44,9 @@ type httpAPI struct {
// Database connection to load specific objects during the scheduling process.
DB db.DB
+
+ // Kubernetes client
+ Client client.Client
}
func NewAPI(config conf.SchedulerConfig, registry *monitoring.Registry, db db.DB, mqttClient mqtt.Client) HTTPAPI {
@@ -53,11 +60,26 @@ func NewAPI(config conf.SchedulerConfig, registry *monitoring.Registry, db db.DB
pipelineConf, db, monitor.SubPipeline("nova-"+pipelineConf.Name), mqttClient,
)
}
+
+ scheme, err := v1alpha1.SchemeBuilder.Build()
+ if err != nil {
+ panic(err)
+ }
+ clientConfig, err := ctrl.GetConfig()
+ if err != nil {
+ panic(err)
+ }
+ cl, err := client.New(clientConfig, client.Options{Scheme: scheme})
+ if err != nil {
+ panic(err)
+ }
+
return &httpAPI{
pipelines: pipelines,
config: config,
monitor: scheduler.NewSchedulerMonitor(registry),
DB: db,
+ Client: cl, // TODO
}
}
@@ -69,6 +91,7 @@ func (httpAPI *httpAPI) Init(mux *http.ServeMux) {
}
mux.HandleFunc("/scheduler/nova/external", httpAPI.NovaExternalScheduler)
mux.HandleFunc("/scheduler/nova/commitments/change", httpAPI.HandleCommitmentChangeRequest)
+ mux.HandleFunc("/scheduler/nova/scheduling-decisions", httpAPI.HandleListSchedulingDecisions)
}
// Check if the scheduler can run based on the request data.
@@ -408,3 +431,64 @@ func (httpAPI *httpAPI) HandleCommitmentChangeRequest(w http.ResponseWriter, r *
}
callback.Respond(http.StatusOK, nil, "")
}
+
+// List all scheduling decisions.
+func (httpAPI *httpAPI) HandleListSchedulingDecisions(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Access-Control-Allow-Origin", "http://localhost:4000")
+ w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
+ w.Header().Set("Access-Control-Allow-Headers", "Content-Type")
+
+ // Handle preflight OPTIONS request
+ if r.Method == http.MethodOptions {
+ w.WriteHeader(http.StatusOK)
+ return
+ }
+
+ callback := httpAPI.monitor.Callback(w, r, "/scheduler/nova/scheduling-decisions")
+
+ // Exit early if the request method is not GET.
+ if r.Method != http.MethodGet {
+ internalErr := fmt.Errorf("invalid request method: %s", r.Method)
+ callback.Respond(http.StatusMethodNotAllowed, internalErr, "invalid request method")
+ return
+ }
+
+ // Check if a specific vm id is requested.
+ vmID := r.URL.Query().Get("vm_id")
+
+ // If no specific vm id is requested, list all scheduling decisions.
+ if vmID == "" {
+ var decisions v1alpha1.SchedulingDecisionList
+ if err := httpAPI.Client.List(r.Context(), &decisions); err != nil {
+ callback.Respond(http.StatusInternalServerError, err, "failed to list scheduling decisions")
+ return
+ }
+ w.Header().Set("Content-Type", "application/json")
+
+ if err := json.NewEncoder(w).Encode(decisions); err != nil {
+ callback.Respond(http.StatusInternalServerError, err, "failed to encode response")
+ return
+ }
+ return
+ }
+
+ var decision v1alpha1.SchedulingDecision
+ nn := client.ObjectKey{Name: vmID}
+ if err := httpAPI.Client.Get(r.Context(), nn, &decision); err != nil {
+ if client.IgnoreNotFound(err) != nil {
+ callback.Respond(http.StatusInternalServerError, err, "failed to get scheduling decision")
+ return
+ }
+ // Not found
+ callback.Respond(http.StatusNotFound, err, "scheduling decision not found")
+ return
+ }
+
+ w.Header().Set("Content-Type", "application/json")
+
+ if err := json.NewEncoder(w).Encode(decision); err != nil {
+ callback.Respond(http.StatusInternalServerError, err, "failed to encode response")
+ return
+ }
+ callback.Respond(http.StatusOK, nil, "Success")
+}
diff --git a/internal/scheduler/nova/api/http/api_test.go b/internal/scheduler/nova/api/http/api_test.go
index 29c69688..5f0f879b 100644
--- a/internal/scheduler/nova/api/http/api_test.go
+++ b/internal/scheduler/nova/api/http/api_test.go
@@ -30,6 +30,19 @@ func (m *mockExternalSchedulerPipeline) Run(request api.ExternalSchedulerRequest
return []string{"host1"}, nil
}
+func (m *mockExternalSchedulerPipeline) SetConsumer(consumer scheduler.SchedulingDecisionConsumer[api.ExternalSchedulerRequest]) {
+ // Do nothing
+}
+
+func (m *mockExternalSchedulerPipeline) Consume(
+ request api.ExternalSchedulerRequest,
+ applicationOrder []string,
+ inWeights map[string]float64,
+ stepWeights map[string]map[string]float64,
+) {
+ // Do nothing
+}
+
func TestCanRunScheduler(t *testing.T) {
httpAPI := &httpAPI{
pipelines: map[string]scheduler.Pipeline[api.ExternalSchedulerRequest]{
@@ -255,6 +268,18 @@ type mockCommitmentsPipeline struct {
shouldError bool
}
+func (p *mockCommitmentsPipeline) SetConsumer(consumer scheduler.SchedulingDecisionConsumer[api.ExternalSchedulerRequest]) {
+
+}
+
+func (p *mockCommitmentsPipeline) Consume(
+ request api.ExternalSchedulerRequest,
+ applicationOrder []string,
+ inWeights map[string]float64,
+ stepWeights map[string]map[string]float64,
+) {
+}
+
func (p *mockCommitmentsPipeline) Run(request api.ExternalSchedulerRequest) ([]string, error) {
if p.shouldError {
return nil, errors.New("mock error")
diff --git a/internal/scheduler/nova/pipeline.go b/internal/scheduler/nova/pipeline.go
index b9f1f2d5..ce3b1b2c 100644
--- a/internal/scheduler/nova/pipeline.go
+++ b/internal/scheduler/nova/pipeline.go
@@ -4,9 +4,12 @@
package nova
import (
+ "context"
"errors"
"log/slog"
+ "math"
+ "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1"
"github.com/cobaltcore-dev/cortex/internal/conf"
"github.com/cobaltcore-dev/cortex/internal/db"
"github.com/cobaltcore-dev/cortex/internal/mqtt"
@@ -16,6 +19,10 @@ import (
"github.com/cobaltcore-dev/cortex/internal/scheduler/nova/plugins/shared"
"github.com/cobaltcore-dev/cortex/internal/scheduler/nova/plugins/vmware"
"github.com/cobaltcore-dev/cortex/internal/sync/openstack/nova"
+ "k8s.io/apimachinery/pkg/api/resource"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ ctrl "sigs.k8s.io/controller-runtime"
+ "sigs.k8s.io/controller-runtime/pkg/client"
)
type NovaStep = scheduler.Step[api.ExternalSchedulerRequest]
@@ -59,6 +66,149 @@ type novaPipeline struct {
preselectAllHosts bool
}
+type novaPipelineConsumer struct {
+ // Kubernetes client to create decision resources.
+ Client client.Client
+}
+
+func NewNovaPipelineConsumer() *novaPipelineConsumer {
+ var kubernetesClient client.Client
+ if scheme, err := v1alpha1.SchemeBuilder.Build(); err == nil {
+ if clientConfig, err := ctrl.GetConfig(); err == nil {
+ if cl, err := client.New(clientConfig, client.Options{Scheme: scheme}); err == nil {
+ // Successfully created a client, use it.
+ kubernetesClient = cl
+ }
+ }
+ }
+ return &novaPipelineConsumer{
+ Client: kubernetesClient,
+ }
+}
+
+func (c *novaPipelineConsumer) Consume(
+ request api.ExternalSchedulerRequest,
+ applicationOrder []string,
+ inWeights map[string]float64,
+ stepWeights map[string]map[string]float64,
+) {
+
+ if c.Client == nil {
+ return
+ }
+
+ // Determine the event type based on request flags
+ var eventType v1alpha1.SchedulingEventType
+ switch {
+ case request.Live:
+ eventType = v1alpha1.SchedulingEventTypeLiveMigration
+ case request.Resize:
+ eventType = v1alpha1.SchedulingEventTypeResize
+ default:
+ eventType = v1alpha1.SchedulingEventTypeInitialPlacement
+ }
+
+ outputs := []v1alpha1.SchedulingDecisionPipelineOutputSpec{}
+ for _, stepKey := range applicationOrder {
+ weights, ok := stepWeights[stepKey]
+ if !ok {
+ // This is ok, since steps can be skipped.
+ continue
+ }
+ activations := make(map[string]float64, len(weights))
+ for k, v := range weights {
+ activations[k] = math.Tanh(v)
+ }
+ outputs = append(outputs, v1alpha1.SchedulingDecisionPipelineOutputSpec{
+ Step: stepKey,
+ Activations: activations,
+ })
+ }
+
+ // Initialize default values for resource calculation
+ var vcpus, ram, disk int
+ var flavorName string
+ var resources map[string]resource.Quantity
+
+ if request.Spec.Data.Flavor.Data.Name == "" {
+ slog.Warn("scheduler: Flavor data is missing, using zero values for resources", "instanceUUID", request.Spec.Data.InstanceUUID)
+ // Use zero values for resources
+ resources = map[string]resource.Quantity{
+ "cpu": *resource.NewQuantity(0, resource.DecimalSI),
+ "memory": *resource.NewQuantity(0, resource.DecimalSI),
+ "storage": *resource.NewQuantity(0, resource.DecimalSI),
+ }
+ flavorName = "unknown"
+ } else {
+ flavor := request.Spec.Data.Flavor
+ flavorName = flavor.Data.Name
+
+ vcpus = int(math.Min(float64(flavor.Data.VCPUs), math.MaxInt))
+ ram = int(math.Min(float64(flavor.Data.MemoryMB), math.MaxInt))
+ disk = int(math.Min(float64(flavor.Data.RootGB), math.MaxInt))
+
+ resources = map[string]resource.Quantity{
+ "cpu": *resource.NewQuantity(int64(vcpus), resource.DecimalSI),
+ "memory": *resource.NewQuantity(int64(ram), resource.DecimalSI),
+ "storage": *resource.NewQuantity(int64(disk), resource.DecimalSI),
+ }
+ }
+
+ if request.VMware {
+ resources["hypervisor.vmware"] = *resource.NewQuantity(1, resource.DecimalSI)
+ resources["hypervisor.kvm"] = *resource.NewQuantity(0, resource.DecimalSI)
+ } else {
+ resources["hypervisor.vmware"] = *resource.NewQuantity(0, resource.DecimalSI)
+ resources["hypervisor.kvm"] = *resource.NewQuantity(1, resource.DecimalSI)
+ }
+
+ decisionRequest := v1alpha1.SchedulingDecisionRequest{
+ ID: request.Context.RequestID,
+ RequestedAt: metav1.Now(),
+ EventType: eventType,
+ Input: inWeights,
+ Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{
+ Name: request.GetPipeline(),
+ Outputs: outputs,
+ },
+ AvailabilityZone: request.Spec.Data.AvailabilityZone,
+ Flavor: v1alpha1.Flavor{
+ Name: flavorName,
+ Resources: resources,
+ },
+ }
+
+ objectKey := client.ObjectKey{Name: request.Spec.Data.InstanceUUID}
+
+ // Try to update existing decision first
+ var existing v1alpha1.SchedulingDecision
+ if err := c.Client.Get(context.Background(), objectKey, &existing); err == nil {
+ // Decision already exists, append the new decision to the existing ones
+ existing.Spec.Decisions = append(existing.Spec.Decisions, decisionRequest)
+
+ if err := c.Client.Update(context.Background(), &existing); err != nil {
+ slog.Error("scheduler: failed to update existing decision", "error", err, "resourceID", request.Spec.Data.InstanceUUID)
+ return
+ }
+ slog.Info("scheduler: appended decision to existing resource", "resourceID", request.Spec.Data.InstanceUUID, "eventType", eventType)
+ return
+ }
+
+ // Decision doesn't exist, create a new one
+ decision := &v1alpha1.SchedulingDecision{
+ ObjectMeta: ctrl.ObjectMeta{Name: request.Spec.Data.InstanceUUID},
+ Spec: v1alpha1.SchedulingDecisionSpec{
+ Decisions: []v1alpha1.SchedulingDecisionRequest{decisionRequest},
+ },
+ // Status will be filled in by the controller.
+ }
+ if err := c.Client.Create(context.Background(), decision); err != nil {
+ slog.Error("scheduler: failed to create decision", "error", err, "resourceID", request.Spec.Data.InstanceUUID)
+ return
+ }
+ slog.Info("scheduler: created new decision", "resourceID", request.Spec.Data.InstanceUUID, "eventType", eventType)
+}
+
// Create a new Nova scheduler pipeline.
func NewPipeline(
config conf.NovaSchedulerPipelineConfig,
@@ -89,7 +239,9 @@ func NewPipeline(
supportedSteps, config.Plugins, wrappers,
db, monitor, mqttClient, TopicFinished,
)
- return &novaPipeline{pipeline, db, config.PreselectAllHosts}
+ wrapped := &novaPipeline{pipeline, db, config.PreselectAllHosts}
+ wrapped.SetConsumer(NewNovaPipelineConsumer())
+ return wrapped
}
// If needed, modify the request before sending it off to the pipeline.
diff --git a/internal/scheduler/nova/pipeline_test.go b/internal/scheduler/nova/pipeline_test.go
index cbd6e5a0..50aa152d 100644
--- a/internal/scheduler/nova/pipeline_test.go
+++ b/internal/scheduler/nova/pipeline_test.go
@@ -330,3 +330,27 @@ func TestPremodifier_ModifyRequest_PreservesOtherFields(t *testing.T) {
t.Error("original host weight should have been replaced")
}
}
+
+// Test that the consumer handles missing flavor data correctly
+func TestConsumerMissingFlavorData(t *testing.T) {
+ consumer := &novaPipelineConsumer{Client: nil}
+
+ request := api.ExternalSchedulerRequest{
+ Context: api.NovaRequestContext{
+ RequestID: "test-request-id",
+ },
+ Spec: api.NovaObject[api.NovaSpec]{
+ Data: api.NovaSpec{
+ InstanceUUID: "test-uuid",
+ Flavor: api.NovaObject[api.NovaFlavor]{
+ Data: api.NovaFlavor{
+ Name: "", // Empty flavor name triggers missing data handling
+ },
+ },
+ },
+ },
+ }
+
+ // Should handle missing flavor data without panic and use fallback values
+ consumer.Consume(request, []string{}, map[string]float64{}, map[string]map[string]float64{})
+}
diff --git a/internal/scheduler/pipeline.go b/internal/scheduler/pipeline.go
index 1b0ac0cd..24b97b9e 100644
--- a/internal/scheduler/pipeline.go
+++ b/internal/scheduler/pipeline.go
@@ -21,6 +21,9 @@ import (
type Pipeline[RequestType PipelineRequest] interface {
// Run the scheduling pipeline with the given request.
Run(request RequestType) ([]string, error)
+
+ // Set the consumer that will receive the decisions.
+ SetConsumer(consumer SchedulingDecisionConsumer[RequestType])
}
type Premodifier[RequestType PipelineRequest] interface {
@@ -43,6 +46,13 @@ type pipeline[RequestType PipelineRequest] struct {
mqttClient mqtt.Client
// MQTT topic to publish telemetry data on when the pipeline is finished.
mqttTopic string
+
+ // Optional consumer to listen for the decisions.
+ Consumer SchedulingDecisionConsumer[RequestType]
+}
+
+func (p *pipeline[RequestType]) SetConsumer(consumer SchedulingDecisionConsumer[RequestType]) {
+ p.Consumer = consumer
}
type StepWrapper[RequestType PipelineRequest] func(Step[RequestType], conf.SchedulerStepConfig) Step[RequestType]
@@ -193,6 +203,15 @@ type TelemetryMessage[RequestType PipelineRequest] struct {
Out map[string]float64 `json:"out"`
}
+type SchedulingDecisionConsumer[RequestType PipelineRequest] interface {
+ Consume(
+ request RequestType,
+ applicationOrder []string,
+ inWeights map[string]float64,
+ stepWeights map[string]map[string]float64,
+ )
+}
+
// Evaluate the pipeline and return a list of subjects in order of preference.
func (p *pipeline[RequestType]) Run(request RequestType) ([]string, error) {
slogArgs := request.GetTraceLogArgs()
@@ -232,5 +251,9 @@ func (p *pipeline[RequestType]) Run(request RequestType) ([]string, error) {
Out: outWeights,
})
+ if p.Consumer != nil {
+ go p.Consumer.Consume(request, p.applicationOrder, inWeights, stepWeights)
+ }
+
return subjects, nil
}
diff --git a/internal/scheduler/pipeline_test.go b/internal/scheduler/pipeline_test.go
index b84a0c4c..734b8806 100644
--- a/internal/scheduler/pipeline_test.go
+++ b/internal/scheduler/pipeline_test.go
@@ -11,6 +11,7 @@ import (
"github.com/cobaltcore-dev/cortex/internal/conf"
"github.com/cobaltcore-dev/cortex/internal/db"
"github.com/cobaltcore-dev/cortex/testlib/mqtt"
+ "k8s.io/client-go/rest"
)
type mockPipelineStep struct {
@@ -227,6 +228,11 @@ func TestNewPipeline(t *testing.T) {
database := db.DB{} // Mock or initialize as needed
monitor := PipelineMonitor{} // Replace with an actual mock implementation if available
mqttClient := &mqtt.MockClient{}
+
+ // Set up kubekonfig for GetConfigOrDie
+ restConfig := &rest.Config{}
+ _ = restConfig
+
supportedSteps := map[string]func() Step[mockPipelineRequest]{
"mock_pipeline_step": func() Step[mockPipelineRequest] {
return &mockPipelineStep{
diff --git a/visualizer/Dockerfile b/visualizer/Dockerfile
index 5ab7a8ee..c83db471 100644
--- a/visualizer/Dockerfile
+++ b/visualizer/Dockerfile
@@ -6,3 +6,4 @@ COPY vendor/mqtt.min.js /usr/share/nginx/html/mqtt.min.js
COPY nova.html /usr/share/nginx/html/nova.html
COPY manila.html /usr/share/nginx/html/manila.html
COPY shared.css /usr/share/nginx/html/shared.css
+COPY favicon.ico /usr/share/nginx/html/favicon.ico
diff --git a/visualizer/favicon.ico b/visualizer/favicon.ico
new file mode 100644
index 00000000..b4f9d5fb
Binary files /dev/null and b/visualizer/favicon.ico differ
diff --git a/visualizer/manila.html b/visualizer/manila.html
index 91eed41f..26786547 100644
--- a/visualizer/manila.html
+++ b/visualizer/manila.html
@@ -3,194 +3,198 @@
-
- Cortex Manila Visualizer
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Waiting for mqtt data to arrive...
-
-
-
-
-
+
+
+
+
+
+ Waiting for mqtt data to arrive...
+
+
+
+
+
-