diff --git a/.github/workflows/push-charts.yaml b/.github/workflows/push-charts.yaml index 7c3a14b1..70cbd76c 100644 --- a/.github/workflows/push-charts.yaml +++ b/.github/workflows/push-charts.yaml @@ -80,3 +80,21 @@ jobs: CHART_PACKAGE=$(ls $CHART_DIR/*.tgz) helm push $CHART_PACKAGE oci://${{ env.REGISTRY }}/${{ github.repository }}/charts/ done + - name: Get all changed decisions Chart.yaml files + id: changed-chart-yaml-files-decisions + uses: tj-actions/changed-files@v47 + with: + files: | + decisions/dist/chart/Chart.yaml + - name: Push decisions charts to registry + if: steps.changed-chart-yaml-files-decisions.outputs.all_changed_files != '' + shell: bash + env: + ALL_CHANGED_FILES: ${{ steps.changed-chart-yaml-files-decisions.outputs.all_changed_files }} + run: | + for CHART_FILE in ${ALL_CHANGED_FILES}; do + CHART_DIR=$(dirname $CHART_FILE) + helm package $CHART_DIR --dependency-update --destination $CHART_DIR + CHART_PACKAGE=$(ls $CHART_DIR/*.tgz) + helm push $CHART_PACKAGE oci://${{ env.REGISTRY }}/${{ github.repository }}/charts/ + done diff --git a/.github/workflows/push-images.yaml b/.github/workflows/push-images.yaml index 43abfbe7..cd9437ba 100644 --- a/.github/workflows/push-images.yaml +++ b/.github/workflows/push-images.yaml @@ -139,3 +139,44 @@ jobs: subject-name: ${{ env.REGISTRY }}/${{ github.repository }}-reservations-operator subject-digest: ${{ steps.push_cortex_reservations.outputs.digest }} push-to-registry: true + # Only build and push the decisions operator image if there are changes + # in the decisions directory. + - name: Get all changed decisions/ files + id: changed_decisions_files + uses: tj-actions/changed-files@v47 + with: + files: | + decisions/** + - name: Docker Meta (Cortex Decisions) + if: steps.changed_decisions_files.outputs.all_changed_files != '' + id: meta_cortex_decisions + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ github.repository }}-decisions-operator + tags: | + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=sha + latest + - name: Build and Push Cortex Decisions Operator + if: steps.changed_decisions_files.outputs.all_changed_files != '' + id: push_cortex_decisions + uses: docker/build-push-action@v6 + with: + context: . + file: Dockerfile.kubebuilder + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ steps.meta_cortex_decisions.outputs.tags }} + labels: ${{ steps.meta_cortex_decisions.outputs.labels }} + build-args: | + GO_MOD_PATH=decisions + GIT_TAG=${{ github.ref_name }} + GIT_COMMIT=${{ github.sha }} + - name: Generate Artifact Attestation for Cortex Decisions + if: steps.changed_decisions_files.outputs.all_changed_files != '' + uses: actions/attest-build-provenance@v3 + with: + subject-name: ${{ env.REGISTRY }}/${{ github.repository }}-decisions-operator + subject-digest: ${{ steps.push_cortex_decisions.outputs.digest }} + push-to-registry: true diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 9888dff8..79833951 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -25,6 +25,8 @@ jobs: go test -v ./... echo "Testing reservations module..." cd reservations && go test -v ./... + echo "Testing decisions module..." + cd ../decisions && go test -v ./... test-with-docker: # We don't need to run this longer test if the previous one already failed. @@ -61,6 +63,14 @@ jobs: -coverprofile=reservations_profile.cov ./internal/... go tool cover -func reservations_profile.cov > reservations_func_coverage.txt cd .. + + echo "Running tests for decisions module..." + cd decisions + go test -v \ + -coverpkg=./internal/... \ + -coverprofile=decisions_profile.cov ./internal/... + go tool cover -func decisions_profile.cov > decisions_func_coverage.txt + cd .. - name: Upload coverage files uses: actions/upload-artifact@v4 with: @@ -68,6 +78,7 @@ jobs: path: | pr_func_coverage.txt reservations/reservations_func_coverage.txt + decisions/decisions_func_coverage.txt # Steps below are only executed if the workflow is triggered by a pull request - name: Delete old coverage comments (PR only) if: ${{ github.event_name == 'pull_request' }} @@ -123,6 +134,19 @@ jobs: reservationsCoverageReport = 'No coverage data available'; } + // Read decisions module coverage report + let decisionsCoverageReport = ''; + let decisionsCoveragePercentage = 'unknown'; + try { + decisionsCoverageReport = fs.readFileSync('decisions/decisions_func_coverage.txt', 'utf8'); + const decisionsLines = decisionsCoverageReport.trim().split('\n'); + const decisionsLastLine = decisionsLines[decisionsLines.length - 1]; + const decisionsCoverageMatch = decisionsLastLine.match(/total:\s+\(statements\)\s+(\d+\.\d+)%/); + decisionsCoveragePercentage = decisionsCoverageMatch ? decisionsCoverageMatch[1] : 'unknown'; + } catch (error) { + decisionsCoverageReport = 'No coverage data available'; + } + let commentBody = '\n'; commentBody += '## Test Coverage Report\n\n'; @@ -144,6 +168,16 @@ jobs: commentBody += '```text\n'; commentBody += reservationsCoverageReport; commentBody += '```\n'; + commentBody += '\n\n'; + + // Decisions module coverage + commentBody += '
\n'; + commentBody += 'Coverage in decisions module (decisions/internal/): '; + commentBody += decisionsCoveragePercentage; + commentBody += '%\n\n'; + commentBody += '```text\n'; + commentBody += decisionsCoverageReport; + commentBody += '```\n'; commentBody += '
\n'; // Post the comment diff --git a/.github/workflows/update-appversion.yml b/.github/workflows/update-appversion.yml index f11e8980..dd4c3c51 100644 --- a/.github/workflows/update-appversion.yml +++ b/.github/workflows/update-appversion.yml @@ -30,6 +30,13 @@ jobs: files: | postgres/** + - name: Get all changed decisions/ files + id: changed_decisions_files + uses: tj-actions/changed-files@v47 + with: + files: | + decisions/** + # Always bumped - name: Update appVersion in cortex-core Chart.yaml run: | @@ -69,3 +76,17 @@ jobs: git add reservations/dist/chart/Chart.yaml git commit -m "Bump cortex-reservations chart appVersions to ${{ steps.vars.outputs.sha }} [skip ci]" || echo "No changes to commit" git push origin HEAD:main + + # Only bumped if there are changes in the decisions directory. + - name: Update appVersion in cortex-decisions Chart.yaml + if: steps.changed_decisions_files.outputs.all_changed_files != '' + run: | + sed -i 's/^\([ ]*appVersion:[ ]*\).*/\1"${{ steps.vars.outputs.sha }}"/' decisions/dist/chart/Chart.yaml + - name: Commit and push changes for cortex-decisions + if: steps.changed_decisions_files.outputs.all_changed_files != '' + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add decisions/dist/chart/Chart.yaml + git commit -m "Bump cortex-decisions chart appVersions to ${{ steps.vars.outputs.sha }} [skip ci]" || echo "No changes to commit" + git push origin HEAD:main diff --git a/Tiltfile b/Tiltfile index 9d0fec1a..d0ada3c1 100644 --- a/Tiltfile +++ b/Tiltfile @@ -37,12 +37,22 @@ def kubebuilder_binary_files(path): docker_build('ghcr.io/cobaltcore-dev/cortex-reservations-operator', '.', dockerfile='Dockerfile.kubebuilder', build_args={'GO_MOD_PATH': 'reservations'}, - only=kubebuilder_binary_files('reservations') + ['internal/', 'go.mod', 'go.sum'], + only=kubebuilder_binary_files('reservations') + ['internal/', 'decisions/', 'go.mod', 'go.sum'], ) local('sh helm/sync.sh reservations/dist/chart') k8s_yaml(helm('reservations/dist/chart', name='cortex-reservations', values=[tilt_values])) k8s_resource('reservations-controller-manager', labels=['Reservations']) +########### Decisions Operator & CRDs +docker_build('ghcr.io/cobaltcore-dev/cortex-decisions-operator', '.', + dockerfile='Dockerfile.kubebuilder', + build_args={'GO_MOD_PATH': 'decisions'}, + only=kubebuilder_binary_files('decisions') + ['internal/', 'go.mod', 'go.sum'], +) +local('sh helm/sync.sh decisions/dist/chart') +k8s_yaml(helm('decisions/dist/chart', name='cortex-decisions', values=[tilt_values])) +k8s_resource('decisions-controller-manager', labels=['Decisions']) + ########### Dev Dependencies local('sh helm/sync.sh helm/dev/cortex-prometheus-operator') k8s_yaml(helm('./helm/dev/cortex-prometheus-operator', name='cortex-prometheus-operator')) # Operator @@ -82,6 +92,7 @@ k8s_resource('cortex-plutono', port_forwards=[ docker_build('ghcr.io/cobaltcore-dev/cortex', '.', only=[ 'internal/', 'commands/', 'main.go', 'go.mod', 'go.sum', 'Makefile', 'reservations/api/', # API module of the reservations operator needed for the scheduler. + 'decisions/api/', # API module of the decisions operator needed for the scheduler. ]) docker_build('ghcr.io/cobaltcore-dev/cortex-postgres', 'postgres') diff --git a/commands/checks/nova/checks.go b/commands/checks/nova/checks.go index a0d8916f..11027589 100644 --- a/commands/checks/nova/checks.go +++ b/commands/checks/nova/checks.go @@ -271,6 +271,7 @@ func randomRequest(dc datacenter, seed int) api.ExternalSchedulerRequest { slog.Info("using flavor extra specs", "extraSpecs", extraSpecs) request := api.ExternalSchedulerRequest{ Spec: api.NovaObject[api.NovaSpec]{Data: api.NovaSpec{ + InstanceUUID: "cortex-e2e-tests", AvailabilityZone: az, ProjectID: project.ID, Flavor: api.NovaObject[api.NovaFlavor]{Data: api.NovaFlavor{ diff --git a/decisions/LICENSE b/decisions/LICENSE new file mode 100644 index 00000000..06c1fb23 --- /dev/null +++ b/decisions/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2024 SAP SE or an SAP affiliate company and cobaltcore-dev contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/decisions/Makefile b/decisions/Makefile new file mode 100644 index 00000000..4d454b5b --- /dev/null +++ b/decisions/Makefile @@ -0,0 +1,58 @@ +.PHONY: all +all: build + +.PHONY: manifests +manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects. + $(CONTROLLER_GEN) rbac:roleName=manager-role crd:allowDangerousTypes=true webhook paths="./..." output:crd:artifacts:config=config/crd/bases + +.PHONY: generate +generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations. + $(CONTROLLER_GEN) crd:allowDangerousTypes=true object:headerFile="hack/boilerplate.go.txt" paths="./..." + +.PHONY: cleanup +cleanup: + rm -rf ./.github + +.PHONY: dekustomize +dekustomize: + kubebuilder edit --plugins=helm/v1-alpha + +##@ Build + +.PHONY: build +build: manifests generate dekustomize cleanup + +LOCALBIN ?= $(shell pwd)/bin +$(LOCALBIN): + mkdir -p $(LOCALBIN) +CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen + +CONTROLLER_TOOLS_VERSION ?= v0.17.2 + +.PHONY: controller-gen +controller-gen: $(CONTROLLER_GEN) ## Download controller-gen locally if necessary. +$(CONTROLLER_GEN): $(LOCALBIN) + $(call go-install-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen,$(CONTROLLER_TOOLS_VERSION)) + +# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) +ifeq (,$(shell go env GOBIN)) +GOBIN=$(shell go env GOPATH)/bin +else +GOBIN=$(shell go env GOBIN) +endif + +# go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist +# $1 - target path with name of binary +# $2 - package url which can be installed +# $3 - specific version of package +define go-install-tool +@[ -f "$(1)-$(3)" ] || { \ +set -e; \ +package=$(2)@$(3) ;\ +echo "Downloading $${package}" ;\ +rm -f $(1) || true ;\ +GOBIN=$(LOCALBIN) go install $${package} ;\ +mv $(1) $(1)-$(3) ;\ +} ;\ +ln -sf $(1)-$(3) $(1) +endef \ No newline at end of file diff --git a/decisions/PROJECT b/decisions/PROJECT new file mode 100644 index 00000000..edd787f2 --- /dev/null +++ b/decisions/PROJECT @@ -0,0 +1,22 @@ +# Code generated by tool. DO NOT EDIT. +# This file is used to track the info used to scaffold your project +# and allow the plugins properly work. +# More info: https://book.kubebuilder.io/reference/project-config.html +cliVersion: 4.7.1 +domain: cortex +layout: +- go.kubebuilder.io/v4 +plugins: + helm.kubebuilder.io/v1-alpha: {} +projectName: decisions +repo: github.com/cobaltcore-dev/cortex/decisions +resources: +- api: + crdVersion: v1 + controller: true + domain: cortex + group: decisions + kind: SchedulingDecision + path: github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1 + version: v1alpha1 +version: "3" diff --git a/decisions/api/LICENSE b/decisions/api/LICENSE new file mode 100644 index 00000000..06c1fb23 --- /dev/null +++ b/decisions/api/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2024 SAP SE or an SAP affiliate company and cobaltcore-dev contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/decisions/api/go.mod b/decisions/api/go.mod new file mode 100644 index 00000000..db86f9fe --- /dev/null +++ b/decisions/api/go.mod @@ -0,0 +1,27 @@ +module github.com/cobaltcore-dev/cortex/decisions/api + +go 1.25.0 + +require ( + k8s.io/apimachinery v0.34.1 + sigs.k8s.io/controller-runtime v0.22.1 +) + +require ( + github.com/fxamacker/cbor/v2 v2.9.0 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect + github.com/x448/float16 v0.8.4 // indirect + go.yaml.in/yaml/v2 v2.4.2 // indirect + golang.org/x/net v0.38.0 // indirect + golang.org/x/text v0.23.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + k8s.io/klog/v2 v2.130.1 // indirect + k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect + sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect + sigs.k8s.io/randfill v1.0.0 // indirect + sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect +) diff --git a/decisions/api/go.sum b/decisions/api/go.sum new file mode 100644 index 00000000..edd5a267 --- /dev/null +++ b/decisions/api/go.sum @@ -0,0 +1,101 @@ +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= +github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg= +github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= +github.com/onsi/gomega v1.36.1 h1:bJDPBO7ibjxcbHMgSCoo4Yj18UWbKDlLwX1x9sybDcw= +github.com/onsi/gomega v1.36.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= +github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= +go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= +golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= +golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= +golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= +golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.34.0 h1:L+JtP2wDbEYPUeNGbeSa/5GwFtIA662EmT2YSLOkAVE= +k8s.io/api v0.34.0/go.mod h1:YzgkIzOOlhl9uwWCZNqpw6RJy9L2FK4dlJeayUoydug= +k8s.io/apimachinery v0.34.1 h1:dTlxFls/eikpJxmAC7MVE8oOeP1zryV7iRyIjB0gky4= +k8s.io/apimachinery v0.34.1/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y= +k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV24Eqg= +sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY= +sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= +sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= +sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= +sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE= +sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= +sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= diff --git a/decisions/api/v1alpha1/groupversion_info.go b/decisions/api/v1alpha1/groupversion_info.go new file mode 100644 index 00000000..8d38e963 --- /dev/null +++ b/decisions/api/v1alpha1/groupversion_info.go @@ -0,0 +1,23 @@ +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 + +// Package v1alpha1 contains API Schema definitions for the decisions v1alpha1 API group. +// +kubebuilder:object:generate=true +// +groupName=decisions.cortex +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/scheme" +) + +var ( + // GroupVersion is group version used to register these objects. + GroupVersion = schema.GroupVersion{Group: "decisions.cortex", Version: "v1alpha1"} + + // SchemeBuilder is used to add go types to the GroupVersionKind scheme. + SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} + + // AddToScheme adds the types in this group-version to the given scheme. + AddToScheme = SchemeBuilder.AddToScheme +) diff --git a/decisions/api/v1alpha1/schedulingdecision_types.go b/decisions/api/v1alpha1/schedulingdecision_types.go new file mode 100644 index 00000000..3eb62ef3 --- /dev/null +++ b/decisions/api/v1alpha1/schedulingdecision_types.go @@ -0,0 +1,119 @@ +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +type SchedulingEventType string + +const ( + SchedulingEventTypeLiveMigration SchedulingEventType = "live-migration" + // SchedulingEventTypeColdMigration SchedulingEventType = "cold-migration" + // SchedulingEventTypeEvacuation SchedulingEventType = "evacuation" + SchedulingEventTypeResize SchedulingEventType = "resize" + SchedulingEventTypeInitialPlacement SchedulingEventType = "initial-placement" +) + +type SchedulingDecisionPipelineOutputSpec struct { + Step string `json:"step"` + Activations map[string]float64 `json:"activations,omitempty"` +} + +type SchedulingDecisionPipelineSpec struct { + Name string `json:"name"` + Outputs []SchedulingDecisionPipelineOutputSpec `json:"outputs,omitempty"` +} + +type Flavor struct { + Name string `json:"name"` + Resources map[string]resource.Quantity `json:"requests,omitempty"` +} + +// SchedulingDecisionSpec defines the desired state of SchedulingDecision. +type SchedulingDecisionSpec struct { // List of scheduling decisions to be processed. + Decisions []SchedulingDecisionRequest `json:"decisions"` +} + +type SchedulingDecisionRequest struct { + ID string `json:"id"` + RequestedAt metav1.Time `json:"requestedAt"` + EventType SchedulingEventType `json:"eventType"` + Input map[string]float64 `json:"input,omitempty"` + Pipeline SchedulingDecisionPipelineSpec `json:"pipeline"` + + AvailabilityZone string `json:"availabilityZone,omitempty"` + + Flavor Flavor `json:"flavor,omitempty"` +} + +type SchedulingDecisionState string + +const ( + SchedulingDecisionStateResolved SchedulingDecisionState = "resolved" + SchedulingDecisionStateError SchedulingDecisionState = "error" +) + +// SchedulingDecisionResult represents the result of processing a single decision request. +type SchedulingDecisionResult struct { + ID string `json:"id"` + Description string `json:"description,omitempty"` + // Final scores for each host after processing all pipeline steps. + FinalScores map[string]float64 `json:"finalScores,omitempty"` + // Hosts that were deleted during pipeline processing and all steps that attempted to delete them. + DeletedHosts map[string][]string `json:"deletedHosts,omitempty"` +} + +// SchedulingDecisionStatus defines the observed state of SchedulingDecision. +type SchedulingDecisionStatus struct { + State SchedulingDecisionState `json:"state,omitempty"` + Error string `json:"error,omitempty"` + + DecisionCount int `json:"decisionCount,omitempty"` + GlobalDescription string `json:"globalDescription,omitempty"` + + Results []SchedulingDecisionResult `json:"results,omitempty"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:resource:scope=Cluster,shortName=sdec;sdecs +// +kubebuilder:printcolumn:name="State",type="string",JSONPath=".status.state" +// +kubebuilder:printcolumn:name="Error",type="string",JSONPath=".status.error" +// +kubebuilder:printcolumn:name="Created",type="date",JSONPath=".metadata.creationTimestamp" +// +kubebuilder:printcolumn:name="Decisions",type="integer",JSONPath=".status.decisionCount" +// +kubebuilder:printcolumn:name="Latest Event",type="string",JSONPath=".spec.decisions[-1].eventType" +// +kubebuilder:printcolumn:name="Description",type="string",JSONPath=".status.globalDescription" + +// SchedulingDecision is the Schema for the schedulingdecisions API +type SchedulingDecision struct { + metav1.TypeMeta `json:",inline"` + + // metadata is a standard object metadata + // +optional + metav1.ObjectMeta `json:"metadata,omitempty,omitzero"` + + // spec defines the desired state of SchedulingDecision + // +required + Spec SchedulingDecisionSpec `json:"spec"` + + // status defines the observed state of SchedulingDecision + // +optional + Status SchedulingDecisionStatus `json:"status,omitempty,omitzero"` +} + +// +kubebuilder:object:root=true + +// SchedulingDecisionList contains a list of SchedulingDecision +type SchedulingDecisionList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []SchedulingDecision `json:"items"` +} + +func init() { + SchemeBuilder.Register(&SchedulingDecision{}, &SchedulingDecisionList{}) +} diff --git a/decisions/api/v1alpha1/zz_generated.deepcopy.go b/decisions/api/v1alpha1/zz_generated.deepcopy.go new file mode 100644 index 00000000..2b061852 --- /dev/null +++ b/decisions/api/v1alpha1/zz_generated.deepcopy.go @@ -0,0 +1,245 @@ +//go:build !ignore_autogenerated + +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 + +// Code generated by controller-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/api/resource" + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Flavor) DeepCopyInto(out *Flavor) { + *out = *in + if in.Resources != nil { + in, out := &in.Resources, &out.Resources + *out = make(map[string]resource.Quantity, len(*in)) + for key, val := range *in { + (*out)[key] = val.DeepCopy() + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Flavor. +func (in *Flavor) DeepCopy() *Flavor { + if in == nil { + return nil + } + out := new(Flavor) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SchedulingDecision) DeepCopyInto(out *SchedulingDecision) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecision. +func (in *SchedulingDecision) DeepCopy() *SchedulingDecision { + if in == nil { + return nil + } + out := new(SchedulingDecision) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *SchedulingDecision) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SchedulingDecisionList) DeepCopyInto(out *SchedulingDecisionList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]SchedulingDecision, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionList. +func (in *SchedulingDecisionList) DeepCopy() *SchedulingDecisionList { + if in == nil { + return nil + } + out := new(SchedulingDecisionList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *SchedulingDecisionList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SchedulingDecisionPipelineOutputSpec) DeepCopyInto(out *SchedulingDecisionPipelineOutputSpec) { + *out = *in + if in.Activations != nil { + in, out := &in.Activations, &out.Activations + *out = make(map[string]float64, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionPipelineOutputSpec. +func (in *SchedulingDecisionPipelineOutputSpec) DeepCopy() *SchedulingDecisionPipelineOutputSpec { + if in == nil { + return nil + } + out := new(SchedulingDecisionPipelineOutputSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SchedulingDecisionPipelineSpec) DeepCopyInto(out *SchedulingDecisionPipelineSpec) { + *out = *in + if in.Outputs != nil { + in, out := &in.Outputs, &out.Outputs + *out = make([]SchedulingDecisionPipelineOutputSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionPipelineSpec. +func (in *SchedulingDecisionPipelineSpec) DeepCopy() *SchedulingDecisionPipelineSpec { + if in == nil { + return nil + } + out := new(SchedulingDecisionPipelineSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SchedulingDecisionRequest) DeepCopyInto(out *SchedulingDecisionRequest) { + *out = *in + in.RequestedAt.DeepCopyInto(&out.RequestedAt) + if in.Input != nil { + in, out := &in.Input, &out.Input + *out = make(map[string]float64, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + in.Pipeline.DeepCopyInto(&out.Pipeline) + in.Flavor.DeepCopyInto(&out.Flavor) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionRequest. +func (in *SchedulingDecisionRequest) DeepCopy() *SchedulingDecisionRequest { + if in == nil { + return nil + } + out := new(SchedulingDecisionRequest) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SchedulingDecisionResult) DeepCopyInto(out *SchedulingDecisionResult) { + *out = *in + if in.FinalScores != nil { + in, out := &in.FinalScores, &out.FinalScores + *out = make(map[string]float64, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.DeletedHosts != nil { + in, out := &in.DeletedHosts, &out.DeletedHosts + *out = make(map[string][]string, len(*in)) + for key, val := range *in { + var outVal []string + if val == nil { + (*out)[key] = nil + } else { + inVal := (*in)[key] + in, out := &inVal, &outVal + *out = make([]string, len(*in)) + copy(*out, *in) + } + (*out)[key] = outVal + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionResult. +func (in *SchedulingDecisionResult) DeepCopy() *SchedulingDecisionResult { + if in == nil { + return nil + } + out := new(SchedulingDecisionResult) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SchedulingDecisionSpec) DeepCopyInto(out *SchedulingDecisionSpec) { + *out = *in + if in.Decisions != nil { + in, out := &in.Decisions, &out.Decisions + *out = make([]SchedulingDecisionRequest, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionSpec. +func (in *SchedulingDecisionSpec) DeepCopy() *SchedulingDecisionSpec { + if in == nil { + return nil + } + out := new(SchedulingDecisionSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SchedulingDecisionStatus) DeepCopyInto(out *SchedulingDecisionStatus) { + *out = *in + if in.Results != nil { + in, out := &in.Results, &out.Results + *out = make([]SchedulingDecisionResult, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionStatus. +func (in *SchedulingDecisionStatus) DeepCopy() *SchedulingDecisionStatus { + if in == nil { + return nil + } + out := new(SchedulingDecisionStatus) + in.DeepCopyInto(out) + return out +} diff --git a/decisions/cmd/main.go b/decisions/cmd/main.go new file mode 100644 index 00000000..6298e6d4 --- /dev/null +++ b/decisions/cmd/main.go @@ -0,0 +1,242 @@ +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "crypto/tls" + "flag" + "os" + "path/filepath" + + // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) + // to ensure that exec-entrypoint and run can make use of them. + _ "k8s.io/client-go/plugin/pkg/client/auth" + + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/certwatcher" + "sigs.k8s.io/controller-runtime/pkg/healthz" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/metrics/filters" + metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + "sigs.k8s.io/controller-runtime/pkg/webhook" + + decisionsv1alpha1 "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" + "github.com/cobaltcore-dev/cortex/decisions/internal/controller" + "github.com/cobaltcore-dev/cortex/internal/conf" + // +kubebuilder:scaffold:imports +) + +var ( + scheme = runtime.NewScheme() + setupLog = ctrl.Log.WithName("setup") +) + +func init() { + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + + utilruntime.Must(decisionsv1alpha1.AddToScheme(scheme)) + // +kubebuilder:scaffold:scheme +} + +// nolint:gocyclo +func main() { + var metricsAddr string + var metricsCertPath, metricsCertName, metricsCertKey string + var webhookCertPath, webhookCertName, webhookCertKey string + var enableLeaderElection bool + var probeAddr string + var secureMetrics bool + var enableHTTP2 bool + var tlsOpts []func(*tls.Config) + flag.StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metrics endpoint binds to. "+ + "Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.") + flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") + flag.BoolVar(&enableLeaderElection, "leader-elect", false, + "Enable leader election for controller manager. "+ + "Enabling this will ensure there is only one active controller manager.") + flag.BoolVar(&secureMetrics, "metrics-secure", true, + "If set, the metrics endpoint is served securely via HTTPS. Use --metrics-secure=false to use HTTP instead.") + flag.StringVar(&webhookCertPath, "webhook-cert-path", "", "The directory that contains the webhook certificate.") + flag.StringVar(&webhookCertName, "webhook-cert-name", "tls.crt", "The name of the webhook certificate file.") + flag.StringVar(&webhookCertKey, "webhook-cert-key", "tls.key", "The name of the webhook key file.") + flag.StringVar(&metricsCertPath, "metrics-cert-path", "", + "The directory that contains the metrics server certificate.") + flag.StringVar(&metricsCertName, "metrics-cert-name", "tls.crt", "The name of the metrics server certificate file.") + flag.StringVar(&metricsCertKey, "metrics-cert-key", "tls.key", "The name of the metrics server key file.") + flag.BoolVar(&enableHTTP2, "enable-http2", false, + "If set, HTTP/2 will be enabled for the metrics and webhook servers") + opts := zap.Options{ + Development: true, + } + opts.BindFlags(flag.CommandLine) + flag.Parse() + + ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) + + // if the enable-http2 flag is false (the default), http/2 should be disabled + // due to its vulnerabilities. More specifically, disabling http/2 will + // prevent from being vulnerable to the HTTP/2 Stream Cancellation and + // Rapid Reset CVEs. For more information see: + // - https://github.com/advisories/GHSA-qppj-fm5r-hxr3 + // - https://github.com/advisories/GHSA-4374-p667-p6c8 + disableHTTP2 := func(c *tls.Config) { + setupLog.Info("disabling http/2") + c.NextProtos = []string{"http/1.1"} + } + + if !enableHTTP2 { + tlsOpts = append(tlsOpts, disableHTTP2) + } + + // Create watchers for metrics and webhooks certificates + var metricsCertWatcher, webhookCertWatcher *certwatcher.CertWatcher + + // Initial webhook TLS options + webhookTLSOpts := tlsOpts + + if len(webhookCertPath) > 0 { + setupLog.Info("Initializing webhook certificate watcher using provided certificates", + "webhook-cert-path", webhookCertPath, "webhook-cert-name", webhookCertName, "webhook-cert-key", webhookCertKey) + + var err error + webhookCertWatcher, err = certwatcher.New( + filepath.Join(webhookCertPath, webhookCertName), + filepath.Join(webhookCertPath, webhookCertKey), + ) + if err != nil { + setupLog.Error(err, "Failed to initialize webhook certificate watcher") + os.Exit(1) + } + + webhookTLSOpts = append(webhookTLSOpts, func(config *tls.Config) { + config.GetCertificate = webhookCertWatcher.GetCertificate + }) + } + + webhookServer := webhook.NewServer(webhook.Options{ + TLSOpts: webhookTLSOpts, + }) + + // Metrics endpoint is enabled in 'config/default/kustomization.yaml'. The Metrics options configure the server. + // More info: + // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.21.0/pkg/metrics/server + // - https://book.kubebuilder.io/reference/metrics.html + metricsServerOptions := metricsserver.Options{ + BindAddress: metricsAddr, + SecureServing: secureMetrics, + TLSOpts: tlsOpts, + } + + if secureMetrics { + // FilterProvider is used to protect the metrics endpoint with authn/authz. + // These configurations ensure that only authorized users and service accounts + // can access the metrics endpoint. The RBAC are configured in 'config/rbac/kustomization.yaml'. More info: + // https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.21.0/pkg/metrics/filters#WithAuthenticationAndAuthorization + metricsServerOptions.FilterProvider = filters.WithAuthenticationAndAuthorization + } + + // If the certificate is not specified, controller-runtime will automatically + // generate self-signed certificates for the metrics server. While convenient for development and testing, + // this setup is not recommended for production. + // + // If you enable certManager, uncomment the following lines: + // - [METRICS-WITH-CERTS] at config/default/kustomization.yaml to generate and use certificates + // managed by cert-manager for the metrics server. + // - [PROMETHEUS-WITH-CERTS] at config/prometheus/kustomization.yaml for TLS certification. + if len(metricsCertPath) > 0 { + setupLog.Info("Initializing metrics certificate watcher using provided certificates", + "metrics-cert-path", metricsCertPath, "metrics-cert-name", metricsCertName, "metrics-cert-key", metricsCertKey) + + var err error + metricsCertWatcher, err = certwatcher.New( + filepath.Join(metricsCertPath, metricsCertName), + filepath.Join(metricsCertPath, metricsCertKey), + ) + if err != nil { + setupLog.Error(err, "to initialize metrics certificate watcher", "error", err) + os.Exit(1) + } + + metricsServerOptions.TLSOpts = append(metricsServerOptions.TLSOpts, func(config *tls.Config) { + config.GetCertificate = metricsCertWatcher.GetCertificate + }) + } + + mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ + Scheme: scheme, + Metrics: metricsServerOptions, + WebhookServer: webhookServer, + HealthProbeBindAddress: probeAddr, + LeaderElection: enableLeaderElection, + LeaderElectionID: "6fb26449.cortex", + // LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily + // when the Manager ends. This requires the binary to immediately end when the + // Manager is stopped, otherwise, this setting is unsafe. Setting this significantly + // speeds up voluntary leader transitions as the new leader don't have to wait + // LeaseDuration time first. + // + // In the default scaffold provided, the program ends immediately after + // the manager stops, so would be fine to enable this option. However, + // if you are doing or is intended to do any operation such as perform cleanups + // after the manager stops then its usage might be unsafe. + // LeaderElectionReleaseOnCancel: true, + }) + if err != nil { + setupLog.Error(err, "unable to start manager") + os.Exit(1) + } + + if err := (&controller.SchedulingDecisionReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Conf: conf.NewConfig[controller.Config](), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "SchedulingDecision") + os.Exit(1) + } + + if err := (&controller.SchedulingDecisionTTLController{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Conf: conf.NewConfig[controller.Config](), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "SchedulingDecisionTTL") + os.Exit(1) + } + // +kubebuilder:scaffold:builder + + if metricsCertWatcher != nil { + setupLog.Info("Adding metrics certificate watcher to manager") + if err := mgr.Add(metricsCertWatcher); err != nil { + setupLog.Error(err, "unable to add metrics certificate watcher to manager") + os.Exit(1) + } + } + + if webhookCertWatcher != nil { + setupLog.Info("Adding webhook certificate watcher to manager") + if err := mgr.Add(webhookCertWatcher); err != nil { + setupLog.Error(err, "unable to add webhook certificate watcher to manager") + os.Exit(1) + } + } + + if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up health check") + os.Exit(1) + } + if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up ready check") + os.Exit(1) + } + + setupLog.Info("starting manager") + if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { + setupLog.Error(err, "problem running manager") + os.Exit(1) + } +} diff --git a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml new file mode 100644 index 00000000..64061bcb --- /dev/null +++ b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml @@ -0,0 +1,171 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.2 + name: schedulingdecisions.decisions.cortex +spec: + group: decisions.cortex + names: + kind: SchedulingDecision + listKind: SchedulingDecisionList + plural: schedulingdecisions + shortNames: + - sdec + - sdecs + singular: schedulingdecision + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .status.state + name: State + type: string + - jsonPath: .status.error + name: Error + type: string + - jsonPath: .metadata.creationTimestamp + name: Created + type: date + - jsonPath: .status.decisionCount + name: Decisions + type: integer + - jsonPath: .spec.decisions[-1].eventType + name: Latest Event + type: string + - jsonPath: .status.globalDescription + name: Description + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: SchedulingDecision is the Schema for the schedulingdecisions + API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: spec defines the desired state of SchedulingDecision + properties: + decisions: + items: + properties: + availabilityZone: + type: string + eventType: + type: string + flavor: + properties: + name: + type: string + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + required: + - name + type: object + id: + type: string + input: + additionalProperties: + type: number + type: object + pipeline: + properties: + name: + type: string + outputs: + items: + properties: + activations: + additionalProperties: + type: number + type: object + step: + type: string + required: + - step + type: object + type: array + required: + - name + type: object + requestedAt: + format: date-time + type: string + required: + - eventType + - id + - pipeline + - requestedAt + type: object + type: array + required: + - decisions + type: object + status: + description: status defines the observed state of SchedulingDecision + properties: + decisionCount: + type: integer + error: + type: string + globalDescription: + type: string + results: + items: + description: SchedulingDecisionResult represents the result of processing + a single decision request. + properties: + deletedHosts: + additionalProperties: + items: + type: string + type: array + description: Hosts that were deleted during pipeline processing + and all steps that attempted to delete them. + type: object + description: + type: string + finalScores: + additionalProperties: + type: number + description: Final scores for each host after processing all + pipeline steps. + type: object + id: + type: string + required: + - id + type: object + type: array + state: + type: string + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} diff --git a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml new file mode 100644 index 00000000..64061bcb --- /dev/null +++ b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml @@ -0,0 +1,171 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.2 + name: schedulingdecisions.decisions.cortex +spec: + group: decisions.cortex + names: + kind: SchedulingDecision + listKind: SchedulingDecisionList + plural: schedulingdecisions + shortNames: + - sdec + - sdecs + singular: schedulingdecision + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .status.state + name: State + type: string + - jsonPath: .status.error + name: Error + type: string + - jsonPath: .metadata.creationTimestamp + name: Created + type: date + - jsonPath: .status.decisionCount + name: Decisions + type: integer + - jsonPath: .spec.decisions[-1].eventType + name: Latest Event + type: string + - jsonPath: .status.globalDescription + name: Description + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: SchedulingDecision is the Schema for the schedulingdecisions + API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: spec defines the desired state of SchedulingDecision + properties: + decisions: + items: + properties: + availabilityZone: + type: string + eventType: + type: string + flavor: + properties: + name: + type: string + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + required: + - name + type: object + id: + type: string + input: + additionalProperties: + type: number + type: object + pipeline: + properties: + name: + type: string + outputs: + items: + properties: + activations: + additionalProperties: + type: number + type: object + step: + type: string + required: + - step + type: object + type: array + required: + - name + type: object + requestedAt: + format: date-time + type: string + required: + - eventType + - id + - pipeline + - requestedAt + type: object + type: array + required: + - decisions + type: object + status: + description: status defines the observed state of SchedulingDecision + properties: + decisionCount: + type: integer + error: + type: string + globalDescription: + type: string + results: + items: + description: SchedulingDecisionResult represents the result of processing + a single decision request. + properties: + deletedHosts: + additionalProperties: + items: + type: string + type: array + description: Hosts that were deleted during pipeline processing + and all steps that attempted to delete them. + type: object + description: + type: string + finalScores: + additionalProperties: + type: number + description: Final scores for each host after processing all + pipeline steps. + type: object + id: + type: string + required: + - id + type: object + type: array + state: + type: string + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} diff --git a/decisions/config/crd/kustomization.yaml b/decisions/config/crd/kustomization.yaml new file mode 100644 index 00000000..c1caafe2 --- /dev/null +++ b/decisions/config/crd/kustomization.yaml @@ -0,0 +1,16 @@ +# This kustomization.yaml is not intended to be run by itself, +# since it depends on service name and namespace that are out of this kustomize package. +# It should be run by config/default +resources: +- bases/decisions.cortex_schedulingdecisions.yaml +# +kubebuilder:scaffold:crdkustomizeresource + +patches: +# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix. +# patches here are for enabling the conversion webhook for each CRD +# +kubebuilder:scaffold:crdkustomizewebhookpatch + +# [WEBHOOK] To enable webhook, uncomment the following section +# the following config is for teaching kustomize how to do kustomization for CRDs. +#configurations: +#- kustomizeconfig.yaml diff --git a/decisions/config/crd/kustomizeconfig.yaml b/decisions/config/crd/kustomizeconfig.yaml new file mode 100644 index 00000000..ec5c150a --- /dev/null +++ b/decisions/config/crd/kustomizeconfig.yaml @@ -0,0 +1,19 @@ +# This file is for teaching kustomize how to substitute name and namespace reference in CRD +nameReference: +- kind: Service + version: v1 + fieldSpecs: + - kind: CustomResourceDefinition + version: v1 + group: apiextensions.k8s.io + path: spec/conversion/webhook/clientConfig/service/name + +namespace: +- kind: CustomResourceDefinition + version: v1 + group: apiextensions.k8s.io + path: spec/conversion/webhook/clientConfig/service/namespace + create: false + +varReference: +- path: metadata/annotations diff --git a/decisions/config/default/cert_metrics_manager_patch.yaml b/decisions/config/default/cert_metrics_manager_patch.yaml new file mode 100644 index 00000000..d9750155 --- /dev/null +++ b/decisions/config/default/cert_metrics_manager_patch.yaml @@ -0,0 +1,30 @@ +# This patch adds the args, volumes, and ports to allow the manager to use the metrics-server certs. + +# Add the volumeMount for the metrics-server certs +- op: add + path: /spec/template/spec/containers/0/volumeMounts/- + value: + mountPath: /tmp/k8s-metrics-server/metrics-certs + name: metrics-certs + readOnly: true + +# Add the --metrics-cert-path argument for the metrics server +- op: add + path: /spec/template/spec/containers/0/args/- + value: --metrics-cert-path=/tmp/k8s-metrics-server/metrics-certs + +# Add the metrics-server certs volume configuration +- op: add + path: /spec/template/spec/volumes/- + value: + name: metrics-certs + secret: + secretName: metrics-server-cert + optional: false + items: + - key: ca.crt + path: ca.crt + - key: tls.crt + path: tls.crt + - key: tls.key + path: tls.key diff --git a/decisions/config/default/kustomization.yaml b/decisions/config/default/kustomization.yaml new file mode 100644 index 00000000..35afcf3b --- /dev/null +++ b/decisions/config/default/kustomization.yaml @@ -0,0 +1,234 @@ +# Adds namespace to all resources. +namespace: cortex-decisions + +# Value of this field is prepended to the +# names of all resources, e.g. a deployment named +# "wordpress" becomes "alices-wordpress". +# Note that it should also match with the prefix (text before '-') of the namespace +# field above. +namePrefix: cortex-decisions- + +# Labels to add to all resources and selectors. +#labels: +#- includeSelectors: true +# pairs: +# someName: someValue + +resources: +- ../crd +- ../rbac +- ../manager +# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in +# crd/kustomization.yaml +#- ../webhook +# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required. +#- ../certmanager +# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. +#- ../prometheus +# [METRICS] Expose the controller manager metrics service. +- metrics_service.yaml +# [NETWORK POLICY] Protect the /metrics endpoint and Webhook Server with NetworkPolicy. +# Only Pod(s) running a namespace labeled with 'metrics: enabled' will be able to gather the metrics. +# Only CR(s) which requires webhooks and are applied on namespaces labeled with 'webhooks: enabled' will +# be able to communicate with the Webhook Server. +#- ../network-policy + +# Uncomment the patches line if you enable Metrics +patches: +# [METRICS] The following patch will enable the metrics endpoint using HTTPS and the port :8443. +# More info: https://book.kubebuilder.io/reference/metrics +- path: manager_metrics_patch.yaml + target: + kind: Deployment + +# Uncomment the patches line if you enable Metrics and CertManager +# [METRICS-WITH-CERTS] To enable metrics protected with certManager, uncomment the following line. +# This patch will protect the metrics with certManager self-signed certs. +#- path: cert_metrics_manager_patch.yaml +# target: +# kind: Deployment + +# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in +# crd/kustomization.yaml +#- path: manager_webhook_patch.yaml +# target: +# kind: Deployment + +# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix. +# Uncomment the following replacements to add the cert-manager CA injection annotations +#replacements: +# - source: # Uncomment the following block to enable certificates for metrics +# kind: Service +# version: v1 +# name: controller-manager-metrics-service +# fieldPath: metadata.name +# targets: +# - select: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: metrics-certs +# fieldPaths: +# - spec.dnsNames.0 +# - spec.dnsNames.1 +# options: +# delimiter: '.' +# index: 0 +# create: true +# - select: # Uncomment the following to set the Service name for TLS config in Prometheus ServiceMonitor +# kind: ServiceMonitor +# group: monitoring.coreos.com +# version: v1 +# name: controller-manager-metrics-monitor +# fieldPaths: +# - spec.endpoints.0.tlsConfig.serverName +# options: +# delimiter: '.' +# index: 0 +# create: true + +# - source: +# kind: Service +# version: v1 +# name: controller-manager-metrics-service +# fieldPath: metadata.namespace +# targets: +# - select: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: metrics-certs +# fieldPaths: +# - spec.dnsNames.0 +# - spec.dnsNames.1 +# options: +# delimiter: '.' +# index: 1 +# create: true +# - select: # Uncomment the following to set the Service namespace for TLS in Prometheus ServiceMonitor +# kind: ServiceMonitor +# group: monitoring.coreos.com +# version: v1 +# name: controller-manager-metrics-monitor +# fieldPaths: +# - spec.endpoints.0.tlsConfig.serverName +# options: +# delimiter: '.' +# index: 1 +# create: true + +# - source: # Uncomment the following block if you have any webhook +# kind: Service +# version: v1 +# name: webhook-service +# fieldPath: .metadata.name # Name of the service +# targets: +# - select: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert +# fieldPaths: +# - .spec.dnsNames.0 +# - .spec.dnsNames.1 +# options: +# delimiter: '.' +# index: 0 +# create: true +# - source: +# kind: Service +# version: v1 +# name: webhook-service +# fieldPath: .metadata.namespace # Namespace of the service +# targets: +# - select: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert +# fieldPaths: +# - .spec.dnsNames.0 +# - .spec.dnsNames.1 +# options: +# delimiter: '.' +# index: 1 +# create: true + +# - source: # Uncomment the following block if you have a ValidatingWebhook (--programmatic-validation) +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert # This name should match the one in certificate.yaml +# fieldPath: .metadata.namespace # Namespace of the certificate CR +# targets: +# - select: +# kind: ValidatingWebhookConfiguration +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 0 +# create: true +# - source: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert +# fieldPath: .metadata.name +# targets: +# - select: +# kind: ValidatingWebhookConfiguration +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 1 +# create: true + +# - source: # Uncomment the following block if you have a DefaultingWebhook (--defaulting ) +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert +# fieldPath: .metadata.namespace # Namespace of the certificate CR +# targets: +# - select: +# kind: MutatingWebhookConfiguration +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 0 +# create: true +# - source: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert +# fieldPath: .metadata.name +# targets: +# - select: +# kind: MutatingWebhookConfiguration +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 1 +# create: true + +# - source: # Uncomment the following block if you have a ConversionWebhook (--conversion) +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert +# fieldPath: .metadata.namespace # Namespace of the certificate CR +# targets: # Do not remove or uncomment the following scaffold marker; required to generate code for target CRD. +# +kubebuilder:scaffold:crdkustomizecainjectionns +# - source: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert +# fieldPath: .metadata.name +# targets: # Do not remove or uncomment the following scaffold marker; required to generate code for target CRD. +# +kubebuilder:scaffold:crdkustomizecainjectionname diff --git a/decisions/config/default/manager_metrics_patch.yaml b/decisions/config/default/manager_metrics_patch.yaml new file mode 100644 index 00000000..2aaef653 --- /dev/null +++ b/decisions/config/default/manager_metrics_patch.yaml @@ -0,0 +1,4 @@ +# This patch adds the args to allow exposing the metrics endpoint using HTTPS +- op: add + path: /spec/template/spec/containers/0/args/0 + value: --metrics-bind-address=:8443 diff --git a/decisions/config/default/metrics_service.yaml b/decisions/config/default/metrics_service.yaml new file mode 100644 index 00000000..cd559a2e --- /dev/null +++ b/decisions/config/default/metrics_service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + control-plane: controller-manager + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: controller-manager-metrics-service + namespace: system +spec: + ports: + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + control-plane: controller-manager + app.kubernetes.io/name: decisions diff --git a/decisions/config/manager/kustomization.yaml b/decisions/config/manager/kustomization.yaml new file mode 100644 index 00000000..0df5546d --- /dev/null +++ b/decisions/config/manager/kustomization.yaml @@ -0,0 +1,8 @@ +resources: +- manager.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +images: +- name: controller + newName: example.com/decisions + newTag: v0.0.1 diff --git a/decisions/config/manager/manager.yaml b/decisions/config/manager/manager.yaml new file mode 100644 index 00000000..91f9f3a4 --- /dev/null +++ b/decisions/config/manager/manager.yaml @@ -0,0 +1,77 @@ +apiVersion: v1 +kind: Namespace +metadata: + labels: + control-plane: controller-manager + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: system +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: controller-manager + namespace: system + labels: + control-plane: controller-manager + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize +spec: + selector: + matchLabels: + control-plane: controller-manager + app.kubernetes.io/name: decisions + replicas: 1 + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: manager + labels: + control-plane: controller-manager + app.kubernetes.io/name: decisions + spec: + securityContext: + # Projects are configured by default to adhere to the "restricted" Pod Security Standards. + # This ensures that deployments meet the highest security requirements for Kubernetes. + # For more details, see: https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + containers: + - command: + - /manager + args: + - --leader-elect + - --health-probe-bind-address=:8081 + image: controller:latest + name: manager + ports: [] + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + capabilities: + drop: + - "ALL" + livenessProbe: + httpGet: + path: /healthz + port: 8081 + initialDelaySeconds: 15 + periodSeconds: 20 + readinessProbe: + httpGet: + path: /readyz + port: 8081 + initialDelaySeconds: 5 + periodSeconds: 10 + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 10m + memory: 64Mi + volumeMounts: [] + volumes: [] + serviceAccountName: controller-manager + terminationGracePeriodSeconds: 10 diff --git a/decisions/config/network-policy/allow-metrics-traffic.yaml b/decisions/config/network-policy/allow-metrics-traffic.yaml new file mode 100644 index 00000000..da847f1b --- /dev/null +++ b/decisions/config/network-policy/allow-metrics-traffic.yaml @@ -0,0 +1,27 @@ +# This NetworkPolicy allows ingress traffic +# with Pods running on namespaces labeled with 'metrics: enabled'. Only Pods on those +# namespaces are able to gather data from the metrics endpoint. +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + labels: + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: allow-metrics-traffic + namespace: system +spec: + podSelector: + matchLabels: + control-plane: controller-manager + app.kubernetes.io/name: decisions + policyTypes: + - Ingress + ingress: + # This allows ingress traffic from any namespace with the label metrics: enabled + - from: + - namespaceSelector: + matchLabels: + metrics: enabled # Only from namespaces with this label + ports: + - port: 8443 + protocol: TCP diff --git a/decisions/config/network-policy/kustomization.yaml b/decisions/config/network-policy/kustomization.yaml new file mode 100644 index 00000000..ec0fb5e5 --- /dev/null +++ b/decisions/config/network-policy/kustomization.yaml @@ -0,0 +1,2 @@ +resources: +- allow-metrics-traffic.yaml diff --git a/decisions/config/prometheus/kustomization.yaml b/decisions/config/prometheus/kustomization.yaml new file mode 100644 index 00000000..fdc5481b --- /dev/null +++ b/decisions/config/prometheus/kustomization.yaml @@ -0,0 +1,11 @@ +resources: +- monitor.yaml + +# [PROMETHEUS-WITH-CERTS] The following patch configures the ServiceMonitor in ../prometheus +# to securely reference certificates created and managed by cert-manager. +# Additionally, ensure that you uncomment the [METRICS WITH CERTMANAGER] patch under config/default/kustomization.yaml +# to mount the "metrics-server-cert" secret in the Manager Deployment. +#patches: +# - path: monitor_tls_patch.yaml +# target: +# kind: ServiceMonitor diff --git a/decisions/config/prometheus/monitor.yaml b/decisions/config/prometheus/monitor.yaml new file mode 100644 index 00000000..bf0a107e --- /dev/null +++ b/decisions/config/prometheus/monitor.yaml @@ -0,0 +1,27 @@ +# Prometheus Monitor Service (Metrics) +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + control-plane: controller-manager + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: controller-manager-metrics-monitor + namespace: system +spec: + endpoints: + - path: /metrics + port: https # Ensure this is the name of the port that exposes HTTPS metrics + scheme: https + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + tlsConfig: + # The option insecureSkipVerify: true is not recommended for production since it disables + # certificate verification, exposing the system to potential man-in-the-middle attacks. + # For production environments, it is recommended to use cert-manager for automatic TLS certificate management. + # To apply this configuration, enable cert-manager and use the patch located at config/prometheus/servicemonitor_tls_patch.yaml, + # which securely references the certificate from the 'metrics-server-cert' secret. + insecureSkipVerify: true + selector: + matchLabels: + control-plane: controller-manager + app.kubernetes.io/name: decisions diff --git a/decisions/config/prometheus/monitor_tls_patch.yaml b/decisions/config/prometheus/monitor_tls_patch.yaml new file mode 100644 index 00000000..5bf84ce0 --- /dev/null +++ b/decisions/config/prometheus/monitor_tls_patch.yaml @@ -0,0 +1,19 @@ +# Patch for Prometheus ServiceMonitor to enable secure TLS configuration +# using certificates managed by cert-manager +- op: replace + path: /spec/endpoints/0/tlsConfig + value: + # SERVICE_NAME and SERVICE_NAMESPACE will be substituted by kustomize + serverName: SERVICE_NAME.SERVICE_NAMESPACE.svc + insecureSkipVerify: false + ca: + secret: + name: metrics-server-cert + key: ca.crt + cert: + secret: + name: metrics-server-cert + key: tls.crt + keySecret: + name: metrics-server-cert + key: tls.key diff --git a/decisions/config/rbac/kustomization.yaml b/decisions/config/rbac/kustomization.yaml new file mode 100644 index 00000000..9a3976b9 --- /dev/null +++ b/decisions/config/rbac/kustomization.yaml @@ -0,0 +1,28 @@ +resources: +# All RBAC will be applied under this service account in +# the deployment namespace. You may comment out this resource +# if your manager will use a service account that exists at +# runtime. Be sure to update RoleBinding and ClusterRoleBinding +# subjects if changing service account names. +- service_account.yaml +- role.yaml +- role_binding.yaml +- leader_election_role.yaml +- leader_election_role_binding.yaml +# The following RBAC configurations are used to protect +# the metrics endpoint with authn/authz. These configurations +# ensure that only authorized users and service accounts +# can access the metrics endpoint. Comment the following +# permissions if you want to disable this protection. +# More info: https://book.kubebuilder.io/reference/metrics.html +- metrics_auth_role.yaml +- metrics_auth_role_binding.yaml +- metrics_reader_role.yaml +# For each CRD, "Admin", "Editor" and "Viewer" roles are scaffolded by +# default, aiding admins in cluster management. Those roles are +# not used by the decisions itself. You can comment the following lines +# if you do not want those helpers be installed with your Project. +- schedulingdecision_admin_role.yaml +- schedulingdecision_editor_role.yaml +- schedulingdecision_viewer_role.yaml + diff --git a/decisions/config/rbac/leader_election_role.yaml b/decisions/config/rbac/leader_election_role.yaml new file mode 100644 index 00000000..3f1f68b5 --- /dev/null +++ b/decisions/config/rbac/leader_election_role.yaml @@ -0,0 +1,40 @@ +# permissions to do leader election. +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: leader-election-role +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch diff --git a/decisions/config/rbac/leader_election_role_binding.yaml b/decisions/config/rbac/leader_election_role_binding.yaml new file mode 100644 index 00000000..1f6f5652 --- /dev/null +++ b/decisions/config/rbac/leader_election_role_binding.yaml @@ -0,0 +1,15 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: leader-election-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: leader-election-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/decisions/config/rbac/metrics_auth_role.yaml b/decisions/config/rbac/metrics_auth_role.yaml new file mode 100644 index 00000000..32d2e4ec --- /dev/null +++ b/decisions/config/rbac/metrics_auth_role.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: metrics-auth-role +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/decisions/config/rbac/metrics_auth_role_binding.yaml b/decisions/config/rbac/metrics_auth_role_binding.yaml new file mode 100644 index 00000000..e775d67f --- /dev/null +++ b/decisions/config/rbac/metrics_auth_role_binding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: metrics-auth-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: metrics-auth-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/decisions/config/rbac/metrics_reader_role.yaml b/decisions/config/rbac/metrics_reader_role.yaml new file mode 100644 index 00000000..51a75db4 --- /dev/null +++ b/decisions/config/rbac/metrics_reader_role.yaml @@ -0,0 +1,9 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: metrics-reader +rules: +- nonResourceURLs: + - "/metrics" + verbs: + - get diff --git a/decisions/config/rbac/role.yaml b/decisions/config/rbac/role.yaml new file mode 100644 index 00000000..ee66f8a1 --- /dev/null +++ b/decisions/config/rbac/role.yaml @@ -0,0 +1,32 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: manager-role +rules: +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions/finalizers + verbs: + - update +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions/status + verbs: + - get + - patch + - update diff --git a/decisions/config/rbac/role_binding.yaml b/decisions/config/rbac/role_binding.yaml new file mode 100644 index 00000000..6a27d9e9 --- /dev/null +++ b/decisions/config/rbac/role_binding.yaml @@ -0,0 +1,15 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: manager-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: manager-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/decisions/config/rbac/schedulingdecision_admin_role.yaml b/decisions/config/rbac/schedulingdecision_admin_role.yaml new file mode 100644 index 00000000..cd8699f9 --- /dev/null +++ b/decisions/config/rbac/schedulingdecision_admin_role.yaml @@ -0,0 +1,27 @@ +# This rule is not used by the project decisions itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants full permissions ('*') over decisions.cortex. +# This role is intended for users authorized to modify roles and bindings within the cluster, +# enabling them to delegate specific permissions to other users or groups as needed. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: schedulingdecision-admin-role +rules: +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions + verbs: + - '*' +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions/status + verbs: + - get diff --git a/decisions/config/rbac/schedulingdecision_editor_role.yaml b/decisions/config/rbac/schedulingdecision_editor_role.yaml new file mode 100644 index 00000000..864ce9ad --- /dev/null +++ b/decisions/config/rbac/schedulingdecision_editor_role.yaml @@ -0,0 +1,33 @@ +# This rule is not used by the project decisions itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants permissions to create, update, and delete resources within the decisions.cortex. +# This role is intended for users who need to manage these resources +# but should not control RBAC or manage permissions for others. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: schedulingdecision-editor-role +rules: +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions/status + verbs: + - get diff --git a/decisions/config/rbac/schedulingdecision_viewer_role.yaml b/decisions/config/rbac/schedulingdecision_viewer_role.yaml new file mode 100644 index 00000000..4d62565e --- /dev/null +++ b/decisions/config/rbac/schedulingdecision_viewer_role.yaml @@ -0,0 +1,29 @@ +# This rule is not used by the project decisions itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants read-only access to decisions.cortex resources. +# This role is intended for users who need visibility into these resources +# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: schedulingdecision-viewer-role +rules: +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions + verbs: + - get + - list + - watch +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions/status + verbs: + - get diff --git a/decisions/config/rbac/service_account.yaml b/decisions/config/rbac/service_account.yaml new file mode 100644 index 00000000..1adb8bd8 --- /dev/null +++ b/decisions/config/rbac/service_account.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: controller-manager + namespace: system diff --git a/decisions/dist/chart/.helmignore b/decisions/dist/chart/.helmignore new file mode 100644 index 00000000..7d92f7fb --- /dev/null +++ b/decisions/dist/chart/.helmignore @@ -0,0 +1,25 @@ +# Patterns to ignore when building Helm packages. +# Operating system files +.DS_Store + +# Version control directories +.git/ +.gitignore +.bzr/ +.hg/ +.hgignore +.svn/ + +# Backup and temporary files +*.swp +*.tmp +*.bak +*.orig +*~ + +# IDE and editor-related files +.idea/ +.vscode/ + +# Helm chart artifacts +dist/chart/*.tgz diff --git a/decisions/dist/chart/Chart.lock b/decisions/dist/chart/Chart.lock new file mode 100644 index 00000000..db4c5823 --- /dev/null +++ b/decisions/dist/chart/Chart.lock @@ -0,0 +1,6 @@ +dependencies: +- name: owner-info + repository: oci://ghcr.io/sapcc/helm-charts + version: 1.0.0 +digest: sha256:7643f231cc4ebda347fd12ec62fe4445c280e2b71d27eec555f3025290f5038f +generated: "2025-08-26T10:55:05.888651+02:00" diff --git a/decisions/dist/chart/Chart.yaml b/decisions/dist/chart/Chart.yaml new file mode 100644 index 00000000..caab06fb --- /dev/null +++ b/decisions/dist/chart/Chart.yaml @@ -0,0 +1,14 @@ +apiVersion: v2 +name: cortex-decisions +description: A Helm chart to distribute the cortex decisions operator. +type: application +version: 0.2.0 +appVersion: "latest" +icon: "https://example.com/icon.png" +dependencies: + # Owner info adds a configmap to the kubernetes cluster with information on + # the service owner. This makes it easier to find out who to contact in case + # of issues. See: https://github.com/sapcc/helm-charts/pkgs/container/helm-charts%2Fowner-info + - name: owner-info + repository: oci://ghcr.io/sapcc/helm-charts + version: 1.0.0 diff --git a/decisions/dist/chart/charts/owner-info-1.0.0.tgz b/decisions/dist/chart/charts/owner-info-1.0.0.tgz new file mode 100644 index 00000000..2032ead9 Binary files /dev/null and b/decisions/dist/chart/charts/owner-info-1.0.0.tgz differ diff --git a/decisions/dist/chart/templates/_helpers.tpl b/decisions/dist/chart/templates/_helpers.tpl new file mode 100644 index 00000000..05ce24c6 --- /dev/null +++ b/decisions/dist/chart/templates/_helpers.tpl @@ -0,0 +1,50 @@ +{{- define "chart.name" -}} +{{- if .Chart }} + {{- if .Chart.Name }} + {{- .Chart.Name | trunc 63 | trimSuffix "-" }} + {{- else if .Values.nameOverride }} + {{ .Values.nameOverride | trunc 63 | trimSuffix "-" }} + {{- else }} + decisions + {{- end }} +{{- else }} + decisions +{{- end }} +{{- end }} + + +{{- define "chart.labels" -}} +{{- if .Chart.AppVersion -}} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +{{- if .Chart.Version }} +helm.sh/chart: {{ .Chart.Version | quote }} +{{- end }} +app.kubernetes.io/name: {{ include "chart.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + + +{{- define "chart.selectorLabels" -}} +app.kubernetes.io/name: {{ include "chart.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + + +{{- define "chart.hasMutatingWebhooks" -}} +{{- $hasMutating := false }} +{{- range . }} + {{- if eq .type "mutating" }} + $hasMutating = true }}{{- end }} +{{- end }} +{{ $hasMutating }}}}{{- end }} + + +{{- define "chart.hasValidatingWebhooks" -}} +{{- $hasValidating := false }} +{{- range . }} + {{- if eq .type "validating" }} + $hasValidating = true }}{{- end }} +{{- end }} +{{ $hasValidating }}}}{{- end }} diff --git a/decisions/dist/chart/templates/certmanager/certificate.yaml b/decisions/dist/chart/templates/certmanager/certificate.yaml new file mode 100644 index 00000000..b1b42606 --- /dev/null +++ b/decisions/dist/chart/templates/certmanager/certificate.yaml @@ -0,0 +1,36 @@ +{{- if .Values.certmanager.enable }} +# Self-signed Issuer +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: selfsigned-issuer + namespace: {{ .Release.Namespace }} +spec: + selfSigned: {} +{{- if .Values.metrics.enable }} +--- +# Certificate for the metrics +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + annotations: + {{- if .Values.crd.keep }} + "helm.sh/resource-policy": keep + {{- end }} + labels: + {{- include "chart.labels" . | nindent 4 }} + name: metrics-certs + namespace: {{ .Release.Namespace }} +spec: + dnsNames: + - decisions.{{ .Release.Namespace }}.svc + - decisions.{{ .Release.Namespace }}.svc.cluster.local + - decisions-metrics-service.{{ .Release.Namespace }}.svc + issuerRef: + kind: Issuer + name: selfsigned-issuer + secretName: metrics-server-cert +{{- end }} +{{- end }} diff --git a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml new file mode 100644 index 00000000..2cea3946 --- /dev/null +++ b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml @@ -0,0 +1,178 @@ +{{- if .Values.crd.enable }} +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + annotations: + {{- if .Values.crd.keep }} + "helm.sh/resource-policy": keep + {{- end }} + controller-gen.kubebuilder.io/version: v0.17.2 + name: schedulingdecisions.decisions.cortex +spec: + group: decisions.cortex + names: + kind: SchedulingDecision + listKind: SchedulingDecisionList + plural: schedulingdecisions + shortNames: + - sdec + - sdecs + singular: schedulingdecision + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .status.state + name: State + type: string + - jsonPath: .status.error + name: Error + type: string + - jsonPath: .metadata.creationTimestamp + name: Created + type: date + - jsonPath: .status.decisionCount + name: Decisions + type: integer + - jsonPath: .spec.decisions[-1].eventType + name: Latest Event + type: string + - jsonPath: .status.globalDescription + name: Description + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: SchedulingDecision is the Schema for the schedulingdecisions + API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: spec defines the desired state of SchedulingDecision + properties: + decisions: + items: + properties: + availabilityZone: + type: string + eventType: + type: string + flavor: + properties: + name: + type: string + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + required: + - name + type: object + id: + type: string + input: + additionalProperties: + type: number + type: object + pipeline: + properties: + name: + type: string + outputs: + items: + properties: + activations: + additionalProperties: + type: number + type: object + step: + type: string + required: + - step + type: object + type: array + required: + - name + type: object + requestedAt: + format: date-time + type: string + required: + - eventType + - id + - pipeline + - requestedAt + type: object + type: array + required: + - decisions + type: object + status: + description: status defines the observed state of SchedulingDecision + properties: + decisionCount: + type: integer + error: + type: string + globalDescription: + type: string + results: + items: + description: SchedulingDecisionResult represents the result of processing + a single decision request. + properties: + deletedHosts: + additionalProperties: + items: + type: string + type: array + description: Hosts that were deleted during pipeline processing + and all steps that attempted to delete them. + type: object + description: + type: string + finalScores: + additionalProperties: + type: number + description: Final scores for each host after processing all + pipeline steps. + type: object + id: + type: string + required: + - id + type: object + type: array + state: + type: string + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} +{{- end -}} diff --git a/decisions/dist/chart/templates/manager/manager.yaml b/decisions/dist/chart/templates/manager/manager.yaml new file mode 100644 index 00000000..060d2e6f --- /dev/null +++ b/decisions/dist/chart/templates/manager/manager.yaml @@ -0,0 +1,107 @@ +# This file is safe from kubebuilder edit --plugins=helm/v1-alpha +# If you want to re-generate, add the --force flag. + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: decisions-controller-manager + namespace: {{ .Release.Namespace }} + labels: + {{- include "chart.labels" . | nindent 4 }} + control-plane: controller-manager +spec: + replicas: {{ .Values.controllerManager.replicas }} + selector: + matchLabels: + {{- include "chart.selectorLabels" . | nindent 6 }} + control-plane: controller-manager + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: manager + labels: + {{- include "chart.labels" . | nindent 8 }} + control-plane: controller-manager + {{- if and .Values.controllerManager.pod .Values.controllerManager.pod.labels }} + {{- range $key, $value := .Values.controllerManager.pod.labels }} + {{ $key }}: {{ $value }} + {{- end }} + {{- end }} + spec: + containers: + - name: manager + args: + {{- range .Values.controllerManager.container.args }} + - {{ . }} + {{- end }} + command: + - /manager + image: {{ .Values.controllerManager.container.image.repository }}:{{ .Values.controllerManager.container.image.tag | default .Chart.AppVersion }} + {{- if .Values.controllerManager.container.env }} + env: + {{- range $key, $value := .Values.controllerManager.container.env }} + - name: {{ $key }} + value: {{ $value }} + {{- end }} + {{- end }} + livenessProbe: + {{- toYaml .Values.controllerManager.container.livenessProbe | nindent 12 }} + readinessProbe: + {{- toYaml .Values.controllerManager.container.readinessProbe | nindent 12 }} + resources: + {{- toYaml .Values.controllerManager.container.resources | nindent 12 }} + securityContext: + {{- toYaml .Values.controllerManager.container.securityContext | nindent 12 }} + volumeMounts: + - name: decisions-controller-manager-config-volume + mountPath: /etc/config + - name: decisions-controller-manager-secrets-volume + mountPath: /etc/secrets + readOnly: true + {{- if and .Values.metrics.enable .Values.certmanager.enable }} + - name: metrics-certs + mountPath: /tmp/k8s-metrics-server/metrics-certs + readOnly: true + {{- end }} + securityContext: + {{- toYaml .Values.controllerManager.securityContext | nindent 8 }} + serviceAccountName: {{ .Values.controllerManager.serviceAccountName }} + terminationGracePeriodSeconds: {{ .Values.controllerManager.terminationGracePeriodSeconds }} + volumes: + # Custom values to configure the controller-manager. + - name: decisions-controller-manager-config-volume + configMap: + name: decisions-controller-manager-config + - name: decisions-controller-manager-secrets-volume + secret: + secretName: decisions-controller-manager-secrets + {{- if and .Values.metrics.enable .Values.certmanager.enable }} + - name: metrics-certs + secret: + secretName: metrics-server-cert + {{- end }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: decisions-controller-manager-config +data: + conf.json: |- + {{- if .Values.decisions.conf }} + {{ toJson .Values.decisions.conf }} + {{- else }} + {} + {{- end }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: decisions-controller-manager-secrets +type: Opaque +data: + secrets.json: |- + {{- if .Values.decisions.secrets }} + {{ toJson .Values.decisions.secrets | b64enc }} + {{- else }} + {{ "{}" | b64enc }} + {{- end }} \ No newline at end of file diff --git a/decisions/dist/chart/templates/metrics/metrics-service.yaml b/decisions/dist/chart/templates/metrics/metrics-service.yaml new file mode 100644 index 00000000..818e728d --- /dev/null +++ b/decisions/dist/chart/templates/metrics/metrics-service.yaml @@ -0,0 +1,18 @@ +{{- if .Values.metrics.enable }} +apiVersion: v1 +kind: Service +metadata: + name: decisions-controller-manager-metrics-service + namespace: {{ .Release.Namespace }} + labels: + {{- include "chart.labels" . | nindent 4 }} + control-plane: controller-manager +spec: + ports: + - port: 8443 + targetPort: 8443 + protocol: TCP + name: https + selector: + control-plane: controller-manager +{{- end }} diff --git a/decisions/dist/chart/templates/network-policy/allow-metrics-traffic.yaml b/decisions/dist/chart/templates/network-policy/allow-metrics-traffic.yaml new file mode 100644 index 00000000..9d54a550 --- /dev/null +++ b/decisions/dist/chart/templates/network-policy/allow-metrics-traffic.yaml @@ -0,0 +1,28 @@ +{{- if .Values.networkPolicy.enable }} +# This NetworkPolicy allows ingress traffic +# with Pods running on namespaces labeled with 'metrics: enabled'. Only Pods on those +# namespaces are able to gather data from the metrics endpoint. +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: allow-metrics-traffic + namespace: {{ .Release.Namespace }} +spec: + podSelector: + matchLabels: + control-plane: controller-manager + app.kubernetes.io/name: decisions + policyTypes: + - Ingress + ingress: + # This allows ingress traffic from any namespace with the label metrics: enabled + - from: + - namespaceSelector: + matchLabels: + metrics: enabled # Only from namespaces with this label + ports: + - port: 8443 + protocol: TCP +{{- end -}} diff --git a/decisions/dist/chart/templates/prometheus/monitor.yaml b/decisions/dist/chart/templates/prometheus/monitor.yaml new file mode 100644 index 00000000..1720ee91 --- /dev/null +++ b/decisions/dist/chart/templates/prometheus/monitor.yaml @@ -0,0 +1,40 @@ +# To integrate with Prometheus. +{{- if .Values.prometheus.enable }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + control-plane: controller-manager + name: decisions-controller-manager-metrics-monitor + namespace: {{ .Release.Namespace }} +spec: + endpoints: + - path: /metrics + port: https + scheme: https + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + tlsConfig: + {{- if .Values.certmanager.enable }} + serverName: decisions-controller-manager-metrics-service.{{ .Release.Namespace }}.svc + # Apply secure TLS configuration with cert-manager + insecureSkipVerify: false + ca: + secret: + name: metrics-server-cert + key: ca.crt + cert: + secret: + name: metrics-server-cert + key: tls.crt + keySecret: + name: metrics-server-cert + key: tls.key + {{- else }} + # Development/Test mode (insecure configuration) + insecureSkipVerify: true + {{- end }} + selector: + matchLabels: + control-plane: controller-manager +{{- end }} diff --git a/decisions/dist/chart/templates/rbac/leader_election_role.yaml b/decisions/dist/chart/templates/rbac/leader_election_role.yaml new file mode 100644 index 00000000..5e5e2ded --- /dev/null +++ b/decisions/dist/chart/templates/rbac/leader_election_role.yaml @@ -0,0 +1,42 @@ +{{- if .Values.rbac.enable }} +# permissions to do leader election. +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + namespace: {{ .Release.Namespace }} + name: decisions-leader-election-role +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/leader_election_role_binding.yaml b/decisions/dist/chart/templates/rbac/leader_election_role_binding.yaml new file mode 100644 index 00000000..a4be63be --- /dev/null +++ b/decisions/dist/chart/templates/rbac/leader_election_role_binding.yaml @@ -0,0 +1,17 @@ +{{- if .Values.rbac.enable }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + namespace: {{ .Release.Namespace }} + name: decisions-leader-election-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: decisions-leader-election-role +subjects: +- kind: ServiceAccount + name: {{ .Values.controllerManager.serviceAccountName }} + namespace: {{ .Release.Namespace }} +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/metrics_auth_role.yaml b/decisions/dist/chart/templates/rbac/metrics_auth_role.yaml new file mode 100644 index 00000000..8ed40055 --- /dev/null +++ b/decisions/dist/chart/templates/rbac/metrics_auth_role.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.rbac.enable .Values.metrics.enable }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: decisions-metrics-auth-role +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/metrics_auth_role_binding.yaml b/decisions/dist/chart/templates/rbac/metrics_auth_role_binding.yaml new file mode 100644 index 00000000..d3ca3c7e --- /dev/null +++ b/decisions/dist/chart/templates/rbac/metrics_auth_role_binding.yaml @@ -0,0 +1,16 @@ +{{- if and .Values.rbac.enable .Values.metrics.enable }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: decisions-metrics-auth-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: decisions-metrics-auth-role +subjects: +- kind: ServiceAccount + name: {{ .Values.controllerManager.serviceAccountName }} + namespace: {{ .Release.Namespace }} +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/metrics_reader_role.yaml b/decisions/dist/chart/templates/rbac/metrics_reader_role.yaml new file mode 100644 index 00000000..81f7da70 --- /dev/null +++ b/decisions/dist/chart/templates/rbac/metrics_reader_role.yaml @@ -0,0 +1,13 @@ +{{- if and .Values.rbac.enable .Values.metrics.enable }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: decisions-metrics-reader +rules: +- nonResourceURLs: + - "/metrics" + verbs: + - get +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/role.yaml b/decisions/dist/chart/templates/rbac/role.yaml new file mode 100644 index 00000000..b93e56fc --- /dev/null +++ b/decisions/dist/chart/templates/rbac/role.yaml @@ -0,0 +1,36 @@ +{{- if .Values.rbac.enable }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: decisions-manager-role +rules: +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions/finalizers + verbs: + - update +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions/status + verbs: + - get + - patch + - update +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/role_binding.yaml b/decisions/dist/chart/templates/rbac/role_binding.yaml new file mode 100644 index 00000000..09804a2a --- /dev/null +++ b/decisions/dist/chart/templates/rbac/role_binding.yaml @@ -0,0 +1,16 @@ +{{- if .Values.rbac.enable }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: decisions-manager-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: decisions-manager-role +subjects: +- kind: ServiceAccount + name: {{ .Values.controllerManager.serviceAccountName }} + namespace: {{ .Release.Namespace }} +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/schedulingdecision_admin_role.yaml b/decisions/dist/chart/templates/rbac/schedulingdecision_admin_role.yaml new file mode 100644 index 00000000..6db64811 --- /dev/null +++ b/decisions/dist/chart/templates/rbac/schedulingdecision_admin_role.yaml @@ -0,0 +1,28 @@ +{{- if .Values.rbac.enable }} +# This rule is not used by the project decisions itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants full permissions ('*') over decisions.cortex. +# This role is intended for users authorized to modify roles and bindings within the cluster, +# enabling them to delegate specific permissions to other users or groups as needed. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: schedulingdecision-admin-role +rules: +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions + verbs: + - '*' +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions/status + verbs: + - get +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/schedulingdecision_editor_role.yaml b/decisions/dist/chart/templates/rbac/schedulingdecision_editor_role.yaml new file mode 100644 index 00000000..7a82611c --- /dev/null +++ b/decisions/dist/chart/templates/rbac/schedulingdecision_editor_role.yaml @@ -0,0 +1,34 @@ +{{- if .Values.rbac.enable }} +# This rule is not used by the project decisions itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants permissions to create, update, and delete resources within the decisions.cortex. +# This role is intended for users who need to manage these resources +# but should not control RBAC or manage permissions for others. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: schedulingdecision-editor-role +rules: +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions/status + verbs: + - get +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/schedulingdecision_viewer_role.yaml b/decisions/dist/chart/templates/rbac/schedulingdecision_viewer_role.yaml new file mode 100644 index 00000000..4375bd65 --- /dev/null +++ b/decisions/dist/chart/templates/rbac/schedulingdecision_viewer_role.yaml @@ -0,0 +1,30 @@ +{{- if .Values.rbac.enable }} +# This rule is not used by the project decisions itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants read-only access to decisions.cortex resources. +# This role is intended for users who need visibility into these resources +# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: schedulingdecision-viewer-role +rules: +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions + verbs: + - get + - list + - watch +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions/status + verbs: + - get +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/service_account.yaml b/decisions/dist/chart/templates/rbac/service_account.yaml new file mode 100644 index 00000000..93e0a323 --- /dev/null +++ b/decisions/dist/chart/templates/rbac/service_account.yaml @@ -0,0 +1,15 @@ +{{- if .Values.rbac.enable }} +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + {{- if and .Values.controllerManager.serviceAccount .Values.controllerManager.serviceAccount.annotations }} + annotations: + {{- range $key, $value := .Values.controllerManager.serviceAccount.annotations }} + {{ $key }}: {{ $value }} + {{- end }} + {{- end }} + name: {{ .Values.controllerManager.serviceAccountName }} + namespace: {{ .Release.Namespace }} +{{- end -}} diff --git a/decisions/dist/chart/values.yaml b/decisions/dist/chart/values.yaml new file mode 100644 index 00000000..8ff57f39 --- /dev/null +++ b/decisions/dist/chart/values.yaml @@ -0,0 +1,131 @@ +# This file is safe from kubebuilder edit --plugins=helm/v1-alpha +# If you want to re-generate, add the --force flag. + +owner-info: + enabled: true + helm-chart-url: "https://github.com/cobaltcore-dev/cortex/decisions/dist/chart" + maintainers: + - "p.matthes@sap.com" + - "markus.wieland@sap.com" + - "arno.uhlig@sap.com" + support-group: "workload-management" + service: "cortex-decisions" + +# [MANAGER]: Manager Deployment Configurations +controllerManager: + replicas: 1 + container: + image: + repository: ghcr.io/cobaltcore-dev/cortex-decisions-operator + args: + - "--leader-elect" + - "--metrics-bind-address=:8443" + - "--health-probe-bind-address=:8081" + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 10m + memory: 64Mi + livenessProbe: + initialDelaySeconds: 15 + periodSeconds: 20 + httpGet: + path: /healthz + port: 8081 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 10 + httpGet: + path: /readyz + port: 8081 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "ALL" + securityContext: + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + terminationGracePeriodSeconds: 10 + serviceAccountName: decisions-controller-manager + +# [RBAC]: To enable RBAC (Permissions) configurations +rbac: + enable: true + +# [CRDs]: To enable the CRDs +crd: + # This option determines whether the CRDs are included + # in the installation process. + enable: true + + # Enabling this option adds the "helm.sh/resource-policy": keep + # annotation to the CRD, ensuring it remains installed even when + # the Helm release is uninstalled. + # NOTE: Removing the CRDs will also remove all cert-manager CR(s) + # (Certificates, Issuers, ...) due to garbage collection. + keep: true + +# [METRICS]: Set to true to generate manifests for exporting metrics. +# To disable metrics export set false, and ensure that the +# ControllerManager argument "--metrics-bind-address=:8443" is removed. +metrics: + enable: true + +# [PROMETHEUS]: To enable a ServiceMonitor to export metrics to Prometheus set true +prometheus: + enable: true + +# [CERT-MANAGER]: To enable cert-manager injection to webhooks set true +certmanager: + enable: false + +# [NETWORK POLICIES]: To enable NetworkPolicies set true +networkPolicy: + enable: false + +# SSO certificate to use. +sharedSSOCert: &sharedSSOCert + # Certificate "public key". (Optional, remove this key if not needed) + cert: | + -----BEGIN CERTIFICATE----- + Your certificate here + -----END CERTIFICATE----- + # Certificate private key. (Optional, remove this key if not needed) + certKey: | + -----BEGIN PRIVATE KEY----- + Your private key here + -----END PRIVATE KEY + # Whether the certificate is self-signed. + # If true, the certificate is not verified. + selfSigned: false + +decisions: + # Default configuration provided through configmap to the operator. + conf: + # Which hypervisor types should be handled by the operator. + hypervisors: + - "QEMU" + - "CH" + # Not supported: + # - "VMware vCenter Server" + # - "ironic" + endpoints: + # The URL of the Nova external scheduler service. + novaExternalScheduler: "http://cortex-nova-scheduler:8080/scheduler/nova/external" + # TTL for scheduling decisions after the last decision's RequestedAt timestamp (in seconds) + ttlAfterDecisionSeconds: 86400 + # Config provided here will override the config provided above. + secrets: + # Override the endpoints and credentials to your OpenStack. + keystone: + url: https://path-to-keystone/v3 + sso: *sharedSSOCert + username: openstack-user-with-all-project-read-access + password: openstack-user-password + projectName: openstack-project-of-user + userDomainName: openstack-domain-of-user + projectDomainName: openstack-domain-of-project-scoped-to diff --git a/decisions/go.mod b/decisions/go.mod new file mode 100644 index 00000000..7d25ec49 --- /dev/null +++ b/decisions/go.mod @@ -0,0 +1,105 @@ +module github.com/cobaltcore-dev/cortex/decisions + +go 1.25.0 + +replace ( + github.com/cobaltcore-dev/cortex => ../ + github.com/cobaltcore-dev/cortex/decisions/api => ./api +) + +require ( + github.com/cobaltcore-dev/cortex v0.0.0-00010101000000-000000000000 + github.com/cobaltcore-dev/cortex/decisions/api v0.0.0-00010101000000-000000000000 + k8s.io/apimachinery v0.34.1 + k8s.io/client-go v0.34.1 + sigs.k8s.io/controller-runtime v0.22.1 +) + +require ( + github.com/pmezard/go-difflib v1.0.0 // indirect + go.yaml.in/yaml/v2 v2.4.2 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect + sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect +) + +require ( + cel.dev/expr v0.24.0 // indirect + github.com/antlr4-go/antlr/v4 v4.13.0 // indirect + github.com/beorn7/perks v1.0.1 // indirect + github.com/blang/semver/v4 v4.0.0 // indirect + github.com/cenkalti/backoff/v4 v4.3.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/emicklei/go-restful/v3 v3.12.2 // indirect + github.com/evanphx/json-patch/v5 v5.9.11 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/fsnotify/fsnotify v1.9.0 // indirect + github.com/fxamacker/cbor/v2 v2.9.0 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-logr/zapr v1.3.0 // indirect + github.com/go-openapi/jsonpointer v0.21.0 // indirect + github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/swag v0.23.0 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/google/btree v1.1.3 // indirect + github.com/google/cel-go v0.26.0 // indirect + github.com/google/gnostic-models v0.7.0 // indirect + github.com/google/go-cmp v0.7.0 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/prometheus/client_golang v1.23.2 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/common v0.66.1 // indirect + github.com/prometheus/procfs v0.17.0 // indirect + github.com/spf13/cobra v1.9.1 // indirect + github.com/spf13/pflag v1.0.6 // indirect + github.com/stoewer/go-strcase v1.3.0 // indirect + github.com/x448/float16 v0.8.4 // indirect + go.opentelemetry.io/auto/sdk v1.1.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 // indirect + go.opentelemetry.io/otel v1.37.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0 // indirect + go.opentelemetry.io/otel/metric v1.37.0 // indirect + go.opentelemetry.io/otel/sdk v1.35.0 // indirect + go.opentelemetry.io/otel/trace v1.37.0 // indirect + go.opentelemetry.io/proto/otlp v1.5.0 // indirect + go.uber.org/multierr v1.11.0 // indirect + go.uber.org/zap v1.27.0 // indirect + golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect + golang.org/x/net v0.44.0 // indirect + golang.org/x/oauth2 v0.30.0 // indirect + golang.org/x/sync v0.17.0 // indirect + golang.org/x/sys v0.36.0 // indirect + golang.org/x/term v0.35.0 // indirect + golang.org/x/text v0.29.0 // indirect + golang.org/x/time v0.12.0 // indirect + gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb // indirect + google.golang.org/grpc v1.72.1 // indirect + google.golang.org/protobuf v1.36.8 // indirect + gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/api v0.34.1 // indirect + k8s.io/apiextensions-apiserver v0.34.0 // indirect + k8s.io/apiserver v0.34.0 // indirect + k8s.io/component-base v0.34.0 // indirect + k8s.io/klog/v2 v2.130.1 // indirect + k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect + k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect + sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect + sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect + sigs.k8s.io/randfill v1.0.0 // indirect + sigs.k8s.io/yaml v1.6.0 // indirect +) diff --git a/decisions/go.sum b/decisions/go.sum new file mode 100644 index 00000000..bc272d7d --- /dev/null +++ b/decisions/go.sum @@ -0,0 +1,259 @@ +cel.dev/expr v0.24.0 h1:56OvJKSH3hDGL0ml5uSxZmz3/3Pq4tJ+fb1unVLAFcY= +cel.dev/expr v0.24.0/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw= +github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI= +github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= +github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= +github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU= +github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k= +github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ= +github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU= +github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= +github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= +github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= +github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= +github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= +github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= +github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= +github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= +github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= +github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= +github.com/google/cel-go v0.26.0 h1:DPGjXackMpJWH680oGY4lZhYjIameYmR+/6RBdDGmaI= +github.com/google/cel-go v0.26.0/go.mod h1:A9O8OU9rdvrK5MQyrqfIxo1a0u4g3sF8KB6PUIaryMM= +github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo= +github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 h1:5ZPtiqj0JL5oKWmcsq4VMaAW5ukBEgSGXEN89zeH1Jo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3/go.mod h1:ndYquD05frm2vACXE1nsccT4oJzjhw2arTS2cpUD1PI= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg= +github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= +github.com/onsi/gomega v1.36.1 h1:bJDPBO7ibjxcbHMgSCoo4Yj18UWbKDlLwX1x9sybDcw= +github.com/onsi/gomega v1.36.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= +github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= +github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0= +github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= +github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= +github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= +github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs= +github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= +go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 h1:sbiXRNDSWJOTobXh5HyQKjq6wUC5tNybqjIqDpAY4CU= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0/go.mod h1:69uWxva0WgAA/4bu2Yy70SLDBwZXuQ6PbBpbsa5iZrQ= +go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= +go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0 h1:OeNbIYk/2C15ckl7glBlOBp5+WlYsOElzTNmiPW/x60= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0/go.mod h1:7Bept48yIeqxP2OZ9/AqIpYS94h2or0aB4FypJTc8ZM= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0 h1:tgJ0uaNS4c98WRNUEx5U3aDlrDOI5Rs+1Vifcw4DJ8U= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0/go.mod h1:U7HYyW0zt/a9x5J1Kjs+r1f/d4ZHnYFclhYY2+YbeoE= +go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= +go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= +go.opentelemetry.io/otel/sdk v1.35.0 h1:iPctf8iprVySXSKJffSS79eOjl9pvxV9ZqOWT0QejKY= +go.opentelemetry.io/otel/sdk v1.35.0/go.mod h1:+ga1bZliga3DxJ3CQGg3updiaAJoNECOgJREo9KHGQg= +go.opentelemetry.io/otel/sdk/metric v1.35.0 h1:1RriWBmCKgkeHEhM7a2uMjMUfP7MsOF5JpUCaEqEI9o= +go.opentelemetry.io/otel/sdk/metric v1.35.0/go.mod h1:is6XYCUMpcKi+ZsOvfluY5YstFnhW0BidkR+gL+qN+w= +go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= +go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= +go.opentelemetry.io/proto/otlp v1.5.0 h1:xJvq7gMzB31/d406fB8U5CBdyQGw4P399D1aQWU/3i4= +go.opentelemetry.io/proto/otlp v1.5.0/go.mod h1:keN8WnHxOy8PG0rQZjJJ5A2ebUoafqWp0eVQ4yIXvJ4= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= +go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= +golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.44.0 h1:evd8IRDyfNBMBTTY5XRF1vaZlD+EmWx6x8PkhR04H/I= +golang.org/x/net v0.44.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY= +golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= +golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= +golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= +golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ= +golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= +golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= +golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= +golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= +golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= +gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= +google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb h1:p31xT4yrYrSM/G4Sn2+TNUkVhFCbG9y8itM2S6Th950= +google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb/go.mod h1:jbe3Bkdp+Dh2IrslsFCklNhweNTBgSYanP1UXhJDhKg= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb h1:TLPQVbx1GJ8VKZxz52VAxl1EBgKXXbTiU9Fc5fZeLn4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I= +google.golang.org/grpc v1.72.1 h1:HR03wO6eyZ7lknl75XlxABNVLLFc2PAb6mHlYh756mA= +google.golang.org/grpc v1.72.1/go.mod h1:wH5Aktxcg25y1I3w7H69nHfXdOG3UiadoBtjh3izSDM= +google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= +google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= +gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.34.1 h1:jC+153630BMdlFukegoEL8E/yT7aLyQkIVuwhmwDgJM= +k8s.io/api v0.34.1/go.mod h1:SB80FxFtXn5/gwzCoN6QCtPD7Vbu5w2n1S0J5gFfTYk= +k8s.io/apiextensions-apiserver v0.34.0 h1:B3hiB32jV7BcyKcMU5fDaDxk882YrJ1KU+ZSkA9Qxoc= +k8s.io/apiextensions-apiserver v0.34.0/go.mod h1:hLI4GxE1BDBy9adJKxUxCEHBGZtGfIg98Q+JmTD7+g0= +k8s.io/apimachinery v0.34.1 h1:dTlxFls/eikpJxmAC7MVE8oOeP1zryV7iRyIjB0gky4= +k8s.io/apimachinery v0.34.1/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw= +k8s.io/apiserver v0.34.0 h1:Z51fw1iGMqN7uJ1kEaynf2Aec1Y774PqU+FVWCFV3Jg= +k8s.io/apiserver v0.34.0/go.mod h1:52ti5YhxAvewmmpVRqlASvaqxt0gKJxvCeW7ZrwgazQ= +k8s.io/client-go v0.34.1 h1:ZUPJKgXsnKwVwmKKdPfw4tB58+7/Ik3CrjOEhsiZ7mY= +k8s.io/client-go v0.34.1/go.mod h1:kA8v0FP+tk6sZA0yKLRG67LWjqufAoSHA2xVGKw9Of8= +k8s.io/component-base v0.34.0 h1:bS8Ua3zlJzapklsB1dZgjEJuJEeHjj8yTu1gxE2zQX8= +k8s.io/component-base v0.34.0/go.mod h1:RSCqUdvIjjrEm81epPcjQ/DS+49fADvGSCkIP3IC6vg= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b h1:MloQ9/bdJyIu9lb1PzujOPolHyvO06MXG5TUIj2mNAA= +k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts= +k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y= +k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 h1:jpcvIRr3GLoUoEKRkHKSmGjxb6lWwrBlJsXc+eUYQHM= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw= +sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV24Eqg= +sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY= +sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= +sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= +sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= +sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE= +sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= +sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= diff --git a/decisions/hack/boilerplate.go.txt b/decisions/hack/boilerplate.go.txt new file mode 100644 index 00000000..0fb88f91 --- /dev/null +++ b/decisions/hack/boilerplate.go.txt @@ -0,0 +1,2 @@ +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 \ No newline at end of file diff --git a/decisions/internal/controller/conf.go b/decisions/internal/controller/conf.go new file mode 100644 index 00000000..5a329b25 --- /dev/null +++ b/decisions/internal/controller/conf.go @@ -0,0 +1,14 @@ +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package controller + +const ( + DefaultTTLAfterDecisionSeconds = 24 * 60 * 60 // 24 hours in seconds +) + +// Configuration for the decisions operator. +type Config struct { + // TTL for scheduling decisions after the last decision's RequestedAt timestamp (in seconds) + TTLAfterDecisionSeconds int `json:"ttlAfterDecisionSeconds,omitempty"` +} diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go new file mode 100644 index 00000000..7456bcce --- /dev/null +++ b/decisions/internal/controller/controller.go @@ -0,0 +1,659 @@ +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package controller + +import ( + "context" + "fmt" + "math" + "sort" + "strings" + "time" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/predicate" + + "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" + decisionsv1alpha1 "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" +) + +const ( + MinScoreValue = -999999 + + selectedPerfectFmt = "Selected: %s (score: %.2f), certainty: perfect, %d hosts evaluated." + selectedCertaintyFmt = "Selected: %s (score: %.2f), certainty: %s (gap: %.2f), %d hosts evaluated." + noHostsRemainingFmt = "No hosts remaining after filtering, %d hosts evaluated" + inputConfirmedFmt = " Input choice confirmed: %s (%.2f→%.2f, remained #1)." + inputFilteredFmt = " Input favored %s (score: %.2f, now filtered), final winner was #%d in input (%.2f→%.2f)." + inputDemotedFmt = " Input favored %s (score: %.2f, now #%d with %.2f), final winner was #%d in input (%.2f→%.2f)." +) + +type certaintyLevel struct { + threshold float64 + level string +} + +var certaintyLevels = []certaintyLevel{ + {0.5, "high"}, + {0.2, "medium"}, + {0.0, "low"}, +} + +func getCertaintyLevel(gap float64) string { + for _, cl := range certaintyLevels { + if gap >= cl.threshold { + return cl.level + } + } + return "low" +} + +type noDeleteEventsPredicate struct{} + +func (noDeleteEventsPredicate) Create(e event.CreateEvent) bool { + return true +} + +func (noDeleteEventsPredicate) Update(e event.UpdateEvent) bool { + return true +} + +func (noDeleteEventsPredicate) Delete(e event.DeleteEvent) bool { + // Ignore delete events to prevent race conditions with TTL controller + return false +} + +func (noDeleteEventsPredicate) Generic(e event.GenericEvent) bool { + return true +} + +type hostScore struct { + host string + score float64 +} + +// mapToSortedHostScores sorts hosts by score descending +func mapToSortedHostScores(scores map[string]float64) []hostScore { + sorted := make([]hostScore, 0, len(scores)) + for host, score := range scores { + sorted = append(sorted, hostScore{host: host, score: score}) + } + sort.Slice(sorted, func(i, j int) bool { + return sorted[i].score > sorted[j].score + }) + return sorted +} + +func findHostPosition(hosts []hostScore, targetHost string) int { + for i, hs := range hosts { + if hs.host == targetHost { + return i + 1 + } + } + return -1 +} + +// SchedulingDecisionReconciler reconciles a SchedulingDecision object +type SchedulingDecisionReconciler struct { + // Client for the kubernetes API. + client.Client + // Kubernetes scheme to use for the decisions. + Scheme *runtime.Scheme + // Configuration for the controller. + Conf Config +} + +// +kubebuilder:rbac:groups=decisions.cortex,resources=schedulingdecisions,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=decisions.cortex,resources=schedulingdecisions/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=decisions.cortex,resources=schedulingdecisions/finalizers,verbs=update + +func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + _ = logf.FromContext(ctx) + // Fetch the decision object. + var res v1alpha1.SchedulingDecision + if err := r.Get(ctx, req.NamespacedName, &res); err != nil { + // Resource was deleted or doesn't exist - nothing to process + // This can happen when the TTL controller deletes a resource while + // a reconcile request is still queued for the main controller + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + // Validate we have at least one decision + if len(res.Spec.Decisions) == 0 { + if err := r.setErrorState(ctx, &res, fmt.Errorf("No decisions provided in spec")); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } + + // Process each decision individually + results := make([]v1alpha1.SchedulingDecisionResult, 0, len(res.Spec.Decisions)) + + for _, decision := range res.Spec.Decisions { + // Validate input has at least one host for this decision + if err := r.validateInput(decision.Input); err != nil { + if err := r.setErrorState(ctx, &res, fmt.Errorf("Decision %s: %v", decision.ID, err)); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } + + // Validate that all hosts in pipeline outputs exist in input for this decision + if err := r.validatePipelineHosts(decision.Input, decision.Pipeline.Outputs); err != nil { + if err := r.setErrorState(ctx, &res, fmt.Errorf("Decision %s: %v", decision.ID, err)); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } + + finalScores, deletedHosts := r.calculateScores(decision.Input, decision.Pipeline.Outputs) + + stepImpacts := r.calculateStepImpacts(decision.Input, decision.Pipeline.Outputs, finalScores) + + criticalSteps, criticalStepCount := r.findCriticalSteps(decision.Input, decision.Pipeline.Outputs, finalScores) + + orderedScores, description := r.generateOrderedScoresAndDescription(finalScores, decision.Input, criticalSteps, criticalStepCount, len(decision.Pipeline.Outputs), stepImpacts) + + result := v1alpha1.SchedulingDecisionResult{ + ID: decision.ID, + Description: description, + FinalScores: orderedScores, + DeletedHosts: deletedHosts, + } + results = append(results, result) + } + + globalDescription := r.generateGlobalDescription(results, res.Spec.Decisions) + + res.Status.State = v1alpha1.SchedulingDecisionStateResolved + res.Status.Error = "" + res.Status.DecisionCount = len(res.Spec.Decisions) + res.Status.GlobalDescription = globalDescription + res.Status.Results = results + + if err := r.Status().Update(ctx, &res); err != nil { + // Handle the case where resource was deleted during processing + if client.IgnoreNotFound(err) != nil { + // If it's a conflict error, just log and ignore - resource was modified concurrently + if apierrors.IsConflict(err) { + log := logf.FromContext(ctx) + log.Info("Resource was modified during processing, ignoring conflict", "name", res.Name, "error", err.Error()) + return ctrl.Result{}, nil + } + return ctrl.Result{}, err + } + // Resource was deleted (e.g., by TTL controller), nothing to update + return ctrl.Result{}, nil + } + + log := logf.FromContext(ctx) + log.Info("Updated SchedulingDecision", "name", res.Name, "decisions", len(res.Spec.Decisions)) + + return ctrl.Result{}, nil // No need to requeue. +} + +func (r *SchedulingDecisionReconciler) validateInput(input map[string]float64) error { + if len(input) == 0 { + return fmt.Errorf("No hosts provided in input") + } + return nil +} + +// validatePipelineHosts checks if all hosts in pipeline outputs exist in input +func (r *SchedulingDecisionReconciler) validatePipelineHosts(input map[string]float64, outputs []v1alpha1.SchedulingDecisionPipelineOutputSpec) error { + for _, output := range outputs { + for hostName := range output.Activations { + if _, exists := input[hostName]; !exists { + return fmt.Errorf("Host '%s' in pipeline output not found in input", hostName) + } + } + } + return nil +} + +// setErrorState sets the error state and updates the resource status +func (r *SchedulingDecisionReconciler) setErrorState(ctx context.Context, res *v1alpha1.SchedulingDecision, err error) error { + res.Status.State = v1alpha1.SchedulingDecisionStateError + res.Status.Error = err.Error() + + log := logf.FromContext(ctx) + log.Error(err, "Updated SchedulingDecision with error", "name", res.Name) + + return r.Status().Update(ctx, res) +} + +// findWinner returns the host with the highest score and the score value +func findWinner(scores map[string]float64) (string, float64) { + if len(scores) == 0 { + return "", MinScoreValue + } + + winner := "" + maxScore := float64(MinScoreValue) + for host, score := range scores { + if score > maxScore { + maxScore = score + winner = host + } + } + return winner, maxScore +} + +// calculateScores processes pipeline outputs and returns final scores and deleted hosts +func (r *SchedulingDecisionReconciler) calculateScores(input map[string]float64, outputs []v1alpha1.SchedulingDecisionPipelineOutputSpec) (map[string]float64, map[string][]string) { + finalScores := make(map[string]float64, len(input)) + deletedHosts := make(map[string][]string) + + // Start with input values as initial scores + for hostName, inputValue := range input { + finalScores[hostName] = inputValue + } + + // Process each pipeline step sequentially + for _, output := range outputs { + // Check which hosts will be deleted in this step + for hostName := range finalScores { + if _, exists := output.Activations[hostName]; !exists { + // Host not in this step's activations - will be deleted + deletedHosts[hostName] = append(deletedHosts[hostName], output.Step) + } + } + + // Apply activations and remove hosts not in this step + for hostName := range finalScores { + if activation, exists := output.Activations[hostName]; exists { + // Add activation to current score + finalScores[hostName] = finalScores[hostName] + activation + } else { + // Host not in this step - remove it + delete(finalScores, hostName) + } + } + } + + return finalScores, deletedHosts +} + +// findCriticalSteps determines which steps change the winning host using backward elimination +func (r *SchedulingDecisionReconciler) findCriticalSteps(input map[string]float64, outputs []v1alpha1.SchedulingDecisionPipelineOutputSpec, baselineFinalScores map[string]float64) ([]string, int) { + if len(outputs) == 0 { + return []string{}, 0 + } + + // Get baseline winner + baselineWinner, _ := findWinner(baselineFinalScores) + if baselineWinner == "" { + return []string{}, 0 + } + + criticalSteps := make([]string, 0) + + // Try removing each step one by one + for i, stepToRemove := range outputs { + // Create pipeline without this step using slice operations + reducedOutputs := make([]v1alpha1.SchedulingDecisionPipelineOutputSpec, 0, len(outputs)-1) + reducedOutputs = append(reducedOutputs, outputs[:i]...) + reducedOutputs = append(reducedOutputs, outputs[i+1:]...) + + // Calculate scores without this step + reducedFinalScores, _ := r.calculateScores(input, reducedOutputs) + + // Find winner without this step + reducedWinner, _ := findWinner(reducedFinalScores) + + // If removing this step changes the winner, it's critical + if reducedWinner != baselineWinner { + criticalSteps = append(criticalSteps, stepToRemove.Step) + } + } + + return criticalSteps, len(criticalSteps) +} + +// StepImpact represents the impact of a single pipeline step on the winning host +type StepImpact struct { + Step string + ScoreBefore float64 + ScoreAfter float64 + ScoreDelta float64 + CompetitorsRemoved int + PromotedToFirst bool +} + +// calculateStepImpacts tracks how each pipeline step affects the final winner +func (r *SchedulingDecisionReconciler) calculateStepImpacts(input map[string]float64, outputs []v1alpha1.SchedulingDecisionPipelineOutputSpec, finalScores map[string]float64) []StepImpact { + if len(finalScores) == 0 || len(outputs) == 0 { + return []StepImpact{} + } + + // Find the final winner + finalWinner, _ := findWinner(finalScores) + if finalWinner == "" { + return []StepImpact{} + } + + stepImpacts := make([]StepImpact, 0, len(outputs)) + currentScores := make(map[string]float64) + + // Start with input values as initial scores + for hostName, inputValue := range input { + currentScores[hostName] = inputValue + } + + // Track score before first step + scoreBefore := currentScores[finalWinner] + + // Process each pipeline step and track the winner's evolution + for _, output := range outputs { + // Count how many competitors will be removed in this step + competitorsRemoved := 0 + for hostName := range currentScores { + if hostName != finalWinner { + if _, exists := output.Activations[hostName]; !exists { + competitorsRemoved++ + } + } + } + + // Check if winner was #1 before this step + wasFirst := true + winnerScoreBefore := currentScores[finalWinner] + for host, score := range currentScores { + if host != finalWinner && score > winnerScoreBefore { + wasFirst = false + break + } + } + + // Apply activations and remove hosts not in this step + newScores := make(map[string]float64) + for hostName, score := range currentScores { + if activation, exists := output.Activations[hostName]; exists { + newScores[hostName] = score + activation + } + // Hosts not in activations are removed (don't copy to newScores) + } + + // Get winner's score after this step + scoreAfter := newScores[finalWinner] + + // Check if winner became #1 after this step + isFirstAfter := true + for host, score := range newScores { + if host != finalWinner && score > scoreAfter { + isFirstAfter = false + break + } + } + + promotedToFirst := !wasFirst && isFirstAfter + + stepImpacts = append(stepImpacts, StepImpact{ + Step: output.Step, + ScoreBefore: scoreBefore, + ScoreAfter: scoreAfter, + ScoreDelta: scoreAfter - scoreBefore, + CompetitorsRemoved: competitorsRemoved, + PromotedToFirst: promotedToFirst, + }) + + // Update for next iteration + currentScores = newScores + scoreBefore = scoreAfter + } + + return stepImpacts +} + +// generateOrderedScoresAndDescription sorts final scores by value (highest to lowest) +// and generates a brief description with highest host, certainty, host count, input comparison, step impacts, and critical path +func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(finalScores map[string]float64, inputScores map[string]float64, criticalSteps []string, criticalStepCount int, totalSteps int, stepImpacts []StepImpact) (map[string]float64, string) { + totalInputHosts := len(inputScores) + if len(finalScores) == 0 { + return finalScores, fmt.Sprintf(noHostsRemainingFmt, totalInputHosts) + } + + // Sort final scores by value (highest to lowest) + sortedHosts := mapToSortedHostScores(finalScores) + + // Create ordered map (Go maps maintain insertion order as of Go 1.8+) + orderedScores := make(map[string]float64) + for _, hs := range sortedHosts { + orderedScores[hs.host] = hs.score + } + + // Sort input scores to determine input-based ranking + sortedInputHosts := mapToSortedHostScores(inputScores) + + // Find positions and generate comparison + finalWinner := sortedHosts[0].host + inputWinner := sortedInputHosts[0].host + finalWinnerInputScore := inputScores[finalWinner] + + // Find final winner's position in input ranking + finalWinnerInputPosition := findHostPosition(sortedInputHosts, finalWinner) + + // Generate main description + var description string + if len(sortedHosts) == 1 { + description = fmt.Sprintf(selectedPerfectFmt, sortedHosts[0].host, sortedHosts[0].score, totalInputHosts) + } else { + // Calculate certainty based on gap between 1st and 2nd place + gap := sortedHosts[0].score - sortedHosts[1].score + certainty := getCertaintyLevel(gap) + description = fmt.Sprintf(selectedCertaintyFmt, sortedHosts[0].host, sortedHosts[0].score, certainty, gap, totalInputHosts) + } + + // Add input vs. final comparison + var comparison string + if inputWinner == finalWinner { + // Input choice confirmed + comparison = fmt.Sprintf(inputConfirmedFmt, finalWinner, finalWinnerInputScore, sortedHosts[0].score) + } else { + // Input winner different from final winner + inputWinnerScore := sortedInputHosts[0].score + + // Check if input winner was filtered out + _, inputWinnerSurvived := finalScores[inputWinner] + if !inputWinnerSurvived { + comparison = fmt.Sprintf(inputFilteredFmt, inputWinner, inputWinnerScore, finalWinnerInputPosition, finalWinnerInputScore, sortedHosts[0].score) + } else { + // Find input winner's position in final ranking + inputWinnerFinalPosition := findHostPosition(sortedHosts, inputWinner) + comparison = fmt.Sprintf(inputDemotedFmt, inputWinner, inputWinnerScore, inputWinnerFinalPosition, finalScores[inputWinner], + finalWinnerInputPosition, finalWinnerInputScore, sortedHosts[0].score) + } + } + + // Add step impact analysis for the winner using multi-line format + var stepImpactInfo string + if len(stepImpacts) > 0 { + stepImpactInfo = r.formatStepImpactsMultiLine(stepImpacts) + } + + // Add critical path information + var criticalPath string + if totalSteps > 0 { + if criticalStepCount == 0 { + criticalPath = fmt.Sprintf(" Decision driven by input only (all %d steps are non-critical).", totalSteps) + } else if criticalStepCount == totalSteps { + criticalPath = fmt.Sprintf(" Decision requires all %d pipeline steps.", totalSteps) + } else { + if criticalStepCount == 1 { + criticalPath = fmt.Sprintf(" Decision driven by 1/%d pipeline step: %s.", totalSteps, criticalSteps[0]) + } else { + // Join critical steps with proper separators + var stepList string + if len(criticalSteps) == 2 { + stepList = strings.Join(criticalSteps, " and ") + } else { + // For 3+ steps: "step1, step2, and step3" + lastStep := criticalSteps[len(criticalSteps)-1] + otherSteps := criticalSteps[:len(criticalSteps)-1] + stepList = strings.Join(otherSteps, ", ") + " and " + lastStep + } + criticalPath = fmt.Sprintf(" Decision driven by %d/%d pipeline steps: %s.", criticalStepCount, totalSteps, stepList) + } + } + } + + description += comparison + criticalPath + stepImpactInfo + return orderedScores, description +} + +// formatImpactValue formats a single step impact value +func formatImpactValue(impact StepImpact) string { + if impact.PromotedToFirst { + return fmt.Sprintf("%+.2f→#1", impact.ScoreDelta) + } + if impact.ScoreDelta != 0 { + return fmt.Sprintf("%+.2f", impact.ScoreDelta) + } + if impact.CompetitorsRemoved > 0 { + return fmt.Sprintf("+0.00 (removed %d)", impact.CompetitorsRemoved) + } + return "+0.00" +} + +// formatStepImpactsMultiLine formats step impacts in a simple delta-ordered format +// without confusing terminology, ordered by absolute impact magnitude +func (r *SchedulingDecisionReconciler) formatStepImpactsMultiLine(stepImpacts []StepImpact) string { + if len(stepImpacts) == 0 { + return "" + } + + // Sort by absolute delta impact (highest first), with promotions taking priority for ties + sort.Slice(stepImpacts, func(i, j int) bool { + absI, absJ := math.Abs(stepImpacts[i].ScoreDelta), math.Abs(stepImpacts[j].ScoreDelta) + if absI != absJ { + return absI > absJ + } + if stepImpacts[i].PromotedToFirst != stepImpacts[j].PromotedToFirst { + return stepImpacts[i].PromotedToFirst + } + return stepImpacts[i].Step < stepImpacts[j].Step + }) + + var b strings.Builder + b.WriteString(" Step impacts:") + for _, impact := range stepImpacts { + fmt.Fprintf(&b, "\n• %s %s", impact.Step, formatImpactValue(impact)) + } + return b.String() + "." +} + +// hostSegment represents a segment in the host chain with duration and decision count +type hostSegment struct { + host string + duration time.Duration + decisions int +} + +// formatDuration formats a duration in a simple d/h/m format +func formatDuration(d time.Duration) string { + if d >= 24*time.Hour { + return fmt.Sprintf("%dd", int(d.Hours()/24)) + } + if d >= time.Hour { + return fmt.Sprintf("%dh", int(d.Hours())) + } + return fmt.Sprintf("%dm", int(d.Minutes())) +} + +// generateGlobalDescription creates a global description for decisions +// showing the host chain with durations and detecting simple loops +func (r *SchedulingDecisionReconciler) generateGlobalDescription(results []v1alpha1.SchedulingDecisionResult, decisions []v1alpha1.SchedulingDecisionRequest) string { + if len(results) == 0 { + return "" // No decisions to describe + } + + // Extract host chain from winners + hostChain := make([]string, 0, len(results)) + for _, result := range results { + winner, _ := findWinner(result.FinalScores) + hostChain = append(hostChain, winner) + } + + // Build segments with durations in one pass + segments := make([]hostSegment, 0) + if len(hostChain) > 0 { + currentHost := hostChain[0] + segmentStart := 0 + + for i := 1; i <= len(hostChain); i++ { + // Check if we've reached the end or found a different host + if i == len(hostChain) || hostChain[i] != currentHost { + // Calculate duration for this segment + startTime := decisions[segmentStart].RequestedAt.Time + var endTime time.Time + if i == len(hostChain) { + // For the last segment, use the same time as start time (0 duration) + endTime = startTime + } else { + endTime = decisions[i].RequestedAt.Time + } + + segments = append(segments, hostSegment{ + host: currentHost, + duration: endTime.Sub(startTime), + decisions: i - segmentStart, + }) + + if i < len(hostChain) { + currentHost = hostChain[i] + segmentStart = i + } + } + } + } + + // Build chain string with durations + chainParts := make([]string, 0, len(segments)) + for _, segment := range segments { + part := segment.host + " (" + formatDuration(segment.duration) + if segment.decisions > 1 { + part += fmt.Sprintf("; %d decisions", segment.decisions) + } + part += ")" + chainParts = append(chainParts, part) + } + + hasLoop := false + seenHosts := make(map[string]bool) + for segment := range segments { + if seenHosts[segments[segment].host] { + hasLoop = true + break + } + seenHosts[segments[segment].host] = true + } + + chainStr := strings.Join(chainParts, " -> ") + if hasLoop { + return fmt.Sprintf("chain (loop detected): %s", chainStr) + } else { + return fmt.Sprintf("chain: %s", chainStr) + } +} + +// SetupWithManager sets up the controller with the Manager. +func (r *SchedulingDecisionReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&decisionsv1alpha1.SchedulingDecision{}). + Named("schedulingdecision"). + WithOptions(controller.Options{ + MaxConcurrentReconciles: 1, // Default + }). + WithEventFilter(predicate.And( + predicate.GenerationChangedPredicate{}, + noDeleteEventsPredicate{}, + )). + Complete(r) +} diff --git a/decisions/internal/controller/controller_test.go b/decisions/internal/controller/controller_test.go new file mode 100644 index 00000000..5d65a7b9 --- /dev/null +++ b/decisions/internal/controller/controller_test.go @@ -0,0 +1,1091 @@ +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package controller + +import ( + "fmt" + "testing" + "time" + + "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" +) + +func TestReconcile(t *testing.T) { + // Create test decision with pipeline outputs + decision := NewTestDecision("decision-1"). + WithInput(map[string]float64{ + "host1": 1.0, + "host2": 2.0, + }). + WithPipelineOutputs( + NewTestPipelineOutput("weigher", map[string]float64{ + "host1": 0.5, + "host2": 0.5, + }), + NewTestPipelineOutput("filter", map[string]float64{ + "host1": 0.0, + }), + ). + Build() + + resource := NewTestSchedulingDecision("test-decision"). + WithDecisions(decision). + Build() + + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-decision") + + reconciler := CreateSchedulingReconciler(fakeClient) + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-decision") + AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateResolved) + AssertNoError(t, updatedResource) + AssertDecisionCount(t, updatedResource, 1) + AssertResultCount(t, updatedResource, 1) + + result := updatedResource.Status.Results[0] + if result.ID != "decision-1" { + t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID) + } + + expectedDescription := "Selected: host1 (score: 1.50), certainty: perfect, 2 hosts evaluated. Input favored host2 (score: 2.00, now filtered), final winner was #2 in input (1.00→1.50). Decision driven by 1/2 pipeline step: filter. Step impacts:\n• weigher +0.50\n• filter +0.00→#1." + if result.Description != expectedDescription { + t.Errorf("Expected description '%s', got '%s'", expectedDescription, result.Description) + } + + // Verify final scores calculation + // Expected: host1: 1.0 + 0.5 + 0.0 = 1.5, host2: removed by filter step + expectedFinalScores := map[string]float64{ + "host1": 1.5, + } + AssertFinalScores(t, result, expectedFinalScores) + + // Verify deleted hosts tracking + expectedDeletedHosts := map[string][]string{ + "host2": {"filter"}, // host2 was deleted by the filter step + } + AssertDeletedHosts(t, result, expectedDeletedHosts) + + t.Logf("Reconcile completed successfully: state=%s, finalScores=%v, deletedHosts=%v", + updatedResource.Status.State, result.FinalScores, result.DeletedHosts) +} + +func TestReconcileEmptyInput(t *testing.T) { + // Create test decision with empty input + decision := NewTestDecision("decision-1"). + WithInput(map[string]float64{}). // Empty input - no hosts + WithPipelineOutputs( + NewTestPipelineOutput("weigher", map[string]float64{ + "host1": 0.5, + "host2": 0.5, + }), + ). + Build() + + resource := NewTestSchedulingDecision("test-decision-empty-input"). + WithDecisions(decision). + Build() + + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-decision-empty-input") + + reconciler := CreateSchedulingReconciler(fakeClient) + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-decision-empty-input") + AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateError) + AssertResourceError(t, updatedResource, "Decision decision-1: No hosts provided in input") + + t.Logf("Reconcile completed with error: state=%s, error=%s", updatedResource.Status.State, updatedResource.Status.Error) +} + +func TestReconcileHostMismatch(t *testing.T) { + // Create test decision with host mismatch (host3 in pipeline but not in input) + decision := NewTestDecision("decision-1"). + WithInput(map[string]float64{ + "host1": 1.0, + "host2": 2.0, + }). + WithPipelineOutputs( + NewTestPipelineOutput("weigher", map[string]float64{ + "host1": 0.5, + "host3": 0.3, // host3 doesn't exist in input + }), + ). + Build() + + resource := NewTestSchedulingDecision("test-decision-host-mismatch"). + WithDecisions(decision). + Build() + + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-decision-host-mismatch") + + reconciler := CreateSchedulingReconciler(fakeClient) + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-decision-host-mismatch") + AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateError) + AssertResourceError(t, updatedResource, "Decision decision-1: Host 'host3' in pipeline output not found in input") + + t.Logf("Reconcile completed with host mismatch error: state=%s, error=%s", updatedResource.Status.State, updatedResource.Status.Error) +} + +func TestReconcileComplexScoring(t *testing.T) { + // Create test decision with complex multi-step pipeline + decision := NewTestDecision("decision-1"). + WithInput(map[string]float64{ + "host1": 1.0, + "host2": 2.0, + "host3": 3.0, + "host4": 4.0, + }). + WithPipelineOutputs( + NewTestPipelineOutput("weigher1", map[string]float64{ + "host1": 0.5, + "host2": 1.0, + "host3": -0.5, + "host4": 2.0, + }), + NewTestPipelineOutput("filter1", map[string]float64{ + "host1": 0.2, + "host3": 0.1, // host2 and host4 removed by this step + }), + NewTestPipelineOutput("weigher2", map[string]float64{ + "host1": -0.3, // host3 removed by this step + }), + ). + Build() + + resource := NewTestSchedulingDecision("test-decision-complex"). + WithDecisions(decision). + Build() + + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-decision-complex") + + reconciler := CreateSchedulingReconciler(fakeClient) + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-decision-complex") + AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateResolved) + AssertResultCount(t, updatedResource, 1) + + result := updatedResource.Status.Results[0] + if result.ID != "decision-1" { + t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID) + } + + // Verify final scores calculation + // Expected: host1: 1.0 + 0.5 + 0.2 + (-0.3) = 1.4 + // host2: removed by filter1, host3: removed by weigher2, host4: removed by filter1 + expectedFinalScores := map[string]float64{ + "host1": 1.4, + } + AssertFinalScores(t, result, expectedFinalScores) + + // Verify deleted hosts tracking + expectedDeletedHosts := map[string][]string{ + "host2": {"filter1"}, // host2 deleted by filter1 + "host4": {"filter1"}, // host4 deleted by filter1 + "host3": {"weigher2"}, // host3 deleted by weigher2 + } + AssertDeletedHosts(t, result, expectedDeletedHosts) + + t.Logf("Complex scoring completed: finalScores=%v, deletedHosts=%v", + result.FinalScores, result.DeletedHosts) +} + +func TestReconcileMultipleDeletionSteps(t *testing.T) { + // Create test decision with multiple filter steps that remove all hosts + decision := NewTestDecision("decision-1"). + WithInput(map[string]float64{ + "host1": 1.0, + "host2": 2.0, + "host3": 3.0, + }). + WithPipelineOutputs( + NewTestPipelineOutput("weigher1", map[string]float64{ + "host1": 0.5, + "host2": 1.0, + "host3": -0.5, + }), + NewTestPipelineOutput("filter1", map[string]float64{ + "host1": 0.2, + // host2 and host3 removed by this step + }), + NewTestPipelineOutput("filter2", map[string]float64{ + // host1 removed by this step + // host2 and host3 would be removed again, but they're already gone + }), + ). + Build() + + resource := NewTestSchedulingDecision("test-decision-multiple-deletions"). + WithDecisions(decision). + Build() + + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-decision-multiple-deletions") + + reconciler := CreateSchedulingReconciler(fakeClient) + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-decision-multiple-deletions") + AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateResolved) + AssertResultCount(t, updatedResource, 1) + + result := updatedResource.Status.Results[0] + if result.ID != "decision-1" { + t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID) + } + + // Verify final scores calculation - all hosts should be removed, no final scores + expectedFinalScores := map[string]float64{} + AssertFinalScores(t, result, expectedFinalScores) + + // Verify deleted hosts tracking + // host2 and host3 deleted by filter1, host1 deleted by filter2 + expectedDeletedHosts := map[string][]string{ + "host2": {"filter1"}, // host2 deleted by filter1 + "host3": {"filter1"}, // host3 deleted by filter1 + "host1": {"filter2"}, // host1 deleted by filter2 + } + AssertDeletedHosts(t, result, expectedDeletedHosts) + + t.Logf("Multiple deletion test completed: finalScores=%v, deletedHosts=%v", + result.FinalScores, result.DeletedHosts) +} + +func TestReconcileCertaintyLevels(t *testing.T) { + tests := []struct { + name string + input map[string]float64 + activations map[string]float64 + expectedWinner string + expectedCertainty string + }{ + { + name: "high-certainty", + input: map[string]float64{ + "host1": 1.0, + "host2": 1.0, + }, + activations: map[string]float64{ + "host1": 1.0, // host1: 2.0, host2: 1.0, gap = 1.0 (high) + "host2": 0.0, + }, + expectedWinner: "host1", + expectedCertainty: "high", + }, + { + name: "medium-certainty", + input: map[string]float64{ + "host1": 1.0, + "host2": 1.0, + }, + activations: map[string]float64{ + "host1": 0.3, // host1: 1.3, host2: 1.0, gap = 0.3 (medium) + "host2": 0.0, + }, + expectedWinner: "host1", + expectedCertainty: "medium", + }, + { + name: "low-certainty", + input: map[string]float64{ + "host1": 1.0, + "host2": 1.0, + }, + activations: map[string]float64{ + "host1": 0.1, // host1: 1.1, host2: 1.0, gap = 0.1 (low) + "host2": 0.0, + }, + expectedWinner: "host1", + expectedCertainty: "low", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create test decision with specific activations to test certainty levels + decision := NewTestDecision("decision-1"). + WithInput(tt.input). + WithPipelineOutputs( + NewTestPipelineOutput("weigher", tt.activations), + ). + Build() + + resource := NewTestSchedulingDecision("test-certainty-" + tt.name). + WithDecisions(decision). + Build() + + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-certainty-" + tt.name) + + reconciler := CreateSchedulingReconciler(fakeClient) + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-certainty-"+tt.name) + AssertResultCount(t, updatedResource, 1) + + result := updatedResource.Status.Results[0] + if result.ID != "decision-1" { + t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID) + } + + // Verify the description contains the expected winner and certainty + AssertDescriptionContains(t, result.Description, + "Selected: "+tt.expectedWinner, + "certainty: "+tt.expectedCertainty, + ) + + t.Logf("Certainty test %s completed: %s", tt.name, result.Description) + }) + } +} + +func TestReconcileNoHostsRemaining(t *testing.T) { + // Create test decision where all hosts are filtered out + decision := NewTestDecision("decision-1"). + WithInput(map[string]float64{ + "host1": 1.0, + "host2": 2.0, + }). + WithPipelineOutputs( + NewTestPipelineOutput("filter-all", map[string]float64{ + // No hosts in activations - all will be filtered out + }), + ). + Build() + + resource := NewTestSchedulingDecision("test-no-hosts-remaining"). + WithDecisions(decision). + Build() + + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-no-hosts-remaining") + + reconciler := CreateSchedulingReconciler(fakeClient) + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-no-hosts-remaining") + AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateResolved) + AssertResultCount(t, updatedResource, 1) + + result := updatedResource.Status.Results[0] + if result.ID != "decision-1" { + t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID) + } + + // Verify no final scores since all hosts were filtered out + expectedFinalScores := map[string]float64{} + AssertFinalScores(t, result, expectedFinalScores) + + expectedDescription := "No hosts remaining after filtering, 2 hosts evaluated" + if result.Description != expectedDescription { + t.Errorf("Expected description '%s', got '%s'", expectedDescription, result.Description) + } + + t.Logf("No hosts remaining test completed: %s", result.Description) +} + +func TestReconcileInputVsFinalComparison(t *testing.T) { + tests := []struct { + name string + input map[string]float64 + activations map[string]float64 + expectedDescContains []string + }{ + { + name: "input-choice-confirmed", + input: map[string]float64{ + "host1": 3.0, // highest in input + "host2": 2.0, + "host3": 1.0, + }, + activations: map[string]float64{ + "host1": 0.5, "host2": 0.3, "host3": 0.1, // host1 stays winner + }, + expectedDescContains: []string{ + "Selected: host1", + "Input choice confirmed: host1 (3.00→3.50, remained #1)", + }, + }, + { + name: "input-winner-filtered", + input: map[string]float64{ + "host1": 1.0, + "host2": 3.0, // highest in input + "host3": 2.0, + }, + activations: map[string]float64{ + "host1": 0.5, "host3": 0.3, // host2 filtered out, host3 becomes winner + }, + expectedDescContains: []string{ + "Selected: host3", + "Input favored host2 (score: 3.00, now filtered)", + "final winner was #2 in input (2.00→2.30)", + }, + }, + { + name: "input-winner-demoted", + input: map[string]float64{ + "host1": 1.0, + "host2": 3.0, // highest in input + "host3": 2.0, + }, + activations: map[string]float64{ + "host1": 2.5, "host2": -0.5, "host3": 0.8, // host1 becomes winner, host2 demoted to #3 + }, + expectedDescContains: []string{ + "Selected: host1", + "Input favored host2 (score: 3.00, now #3 with 2.50)", + "final winner was #3 in input (1.00→3.50)", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create test decision to compare input vs final rankings + decision := NewTestDecision("decision-1"). + WithInput(tt.input). + WithPipelineOutputs( + NewTestPipelineOutput("weigher", tt.activations), + ). + Build() + + resource := NewTestSchedulingDecision("test-input-vs-final-" + tt.name). + WithDecisions(decision). + Build() + + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-input-vs-final-" + tt.name) + + reconciler := CreateSchedulingReconciler(fakeClient) + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-input-vs-final-"+tt.name) + AssertResultCount(t, updatedResource, 1) + + result := updatedResource.Status.Results[0] + if result.ID != "decision-1" { + t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID) + } + + // Verify the description contains expected elements + AssertDescriptionContains(t, result.Description, tt.expectedDescContains...) + + t.Logf("Input vs Final test %s completed: %s", tt.name, result.Description) + }) + } +} + +func TestReconcileCriticalStepElimination(t *testing.T) { + tests := []struct { + name string + input map[string]float64 + pipelineOutputs []v1alpha1.SchedulingDecisionPipelineOutputSpec + expectedCriticalMessage string + }{ + { + name: "single-critical-step", + input: map[string]float64{ + "host1": 2.0, // Would win without pipeline + "host2": 1.0, + "host3": 1.5, + }, + pipelineOutputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "non-critical-weigher", + Activations: map[string]float64{ + "host1": 0.1, // Small changes don't affect winner + "host2": 0.1, + "host3": 0.1, + }, + }, + { + Step: "critical-filter", + Activations: map[string]float64{ + "host2": 0.0, // host1 and host3 filtered out, host2 becomes winner + "host3": 0.0, + }, + }, + }, + expectedCriticalMessage: "Decision driven by 1/2 pipeline step: critical-filter.", + }, + { + name: "multiple-critical-steps", + input: map[string]float64{ + "host1": 1.0, + "host2": 3.0, // Strong initial winner + "host3": 2.0, + }, + pipelineOutputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "critical-weigher1", + Activations: map[string]float64{ + "host1": 1.0, // host1: 2.0, host2: 2.5, host3: 2.5 (ties host2 and host3) + "host2": -0.5, + "host3": 0.5, + }, + }, + { + Step: "critical-weigher2", + Activations: map[string]float64{ + "host1": 1.0, // host1: 3.0, host2: 2.5, host3: 2.5 (host1 becomes winner) + "host2": 0.0, + "host3": 0.0, + }, + }, + }, + expectedCriticalMessage: "Decision requires all 2 pipeline steps.", + }, + { + name: "all-non-critical", + input: map[string]float64{ + "host1": 3.0, // Clear winner from input + "host2": 1.0, + "host3": 2.0, + }, + pipelineOutputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "non-critical-weigher1", + Activations: map[string]float64{ + "host1": 0.1, // Small changes don't change winner + "host2": 0.1, + "host3": 0.1, + }, + }, + { + Step: "non-critical-weigher2", + Activations: map[string]float64{ + "host1": 0.2, + "host2": 0.0, + "host3": 0.1, + }, + }, + }, + expectedCriticalMessage: "Decision driven by input only (all 2 steps are non-critical).", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create test decision with multiple pipeline steps to test critical step analysis + decision := NewTestDecision("decision-1"). + WithInput(tt.input). + WithPipelineOutputs(tt.pipelineOutputs...). + Build() + + resource := NewTestSchedulingDecision("test-critical-steps-" + tt.name). + WithDecisions(decision). + Build() + + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-critical-steps-" + tt.name) + + reconciler := CreateSchedulingReconciler(fakeClient) + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-critical-steps-"+tt.name) + AssertResultCount(t, updatedResource, 1) + + result := updatedResource.Status.Results[0] + if result.ID != "decision-1" { + t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID) + } + + // Verify the description contains the expected critical step message + AssertDescriptionContains(t, result.Description, tt.expectedCriticalMessage) + + t.Logf("Critical step test %s completed: %s", tt.name, result.Description) + }) + } +} + +func TestReconcileGlobalDescription(t *testing.T) { + tests := []struct { + name string + decisions []v1alpha1.SchedulingDecisionRequest + expectedGlobalDescription string + }{ + { + name: "single-decision-with-global", + decisions: []v1alpha1.SchedulingDecisionRequest{ + NewTestDecision("decision-1"). + WithInput(map[string]float64{"host1": 1.0, "host2": 2.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 1.5, "host2": 0.0})). + Build(), + }, + expectedGlobalDescription: "chain: host1 (0m)", // Single decision shows chain with 0m duration - host1 wins with 2.5 vs host2 with 2.0 + }, + { + name: "simple-chain-no-loop", + decisions: []v1alpha1.SchedulingDecisionRequest{ + NewTestDecision("decision-1"). + WithRequestedAt(time.Now().Add(-5 * time.Hour)). + WithInput(map[string]float64{"host1": 1.0, "host2": 2.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 2.0, "host2": 0.0})). + Build(), + NewTestDecision("decision-2"). + WithRequestedAt(time.Now().Add(-3 * time.Hour)). + WithInput(map[string]float64{"host2": 1.0, "host3": 2.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host2": 1.5, "host3": 0.0})). + Build(), + NewTestDecision("decision-3"). + WithRequestedAt(time.Now().Add(-1 * time.Hour)). + WithInput(map[string]float64{"host2": 1.0, "host3": 2.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host2": 1.5, "host3": 0.0})). + Build(), + NewTestDecision("decision-4"). + WithRequestedAt(time.Now()). + WithInput(map[string]float64{"host3": 1.0, "host4": 2.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host3": 0.0, "host4": 1.0})). + Build(), + }, + expectedGlobalDescription: "chain: host1 (2h) -> host2 (3h; 2 decisions) -> host4 (0m)", + }, + { + name: "chain-with-loop", + decisions: []v1alpha1.SchedulingDecisionRequest{ + NewTestDecision("decision-1"). + WithRequestedAt(time.Now().Add(-5 * time.Hour)). + WithInput(map[string]float64{"host1": 1.0, "host2": 2.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 2.0, "host2": 0.0})). + Build(), + NewTestDecision("decision-2"). + WithRequestedAt(time.Now().Add(-2 * time.Hour)). + WithInput(map[string]float64{"host1": 1.0, "host2": 2.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 0.0, "host2": 1.0})). + Build(), + NewTestDecision("decision-3"). + WithRequestedAt(time.Now().Add(-1 * time.Hour)). + WithInput(map[string]float64{"host1": 1.0, "host2": 2.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 2.0, "host2": 0.0})). + Build(), + NewTestDecision("decision-4"). + WithRequestedAt(time.Now()). + WithInput(map[string]float64{"host3": 1.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host3": 0.0})). + Build(), + }, + expectedGlobalDescription: "chain (loop detected): host1 (3h) -> host2 (1h) -> host1 (1h) -> host3 (0m)", + }, + { + name: "same-host-all-decisions-no-loop", + decisions: []v1alpha1.SchedulingDecisionRequest{ + NewTestDecision("decision-1"). + WithRequestedAt(time.Now().Add(-2 * time.Hour)). + WithInput(map[string]float64{"host1": 2.0, "host2": 1.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 1.0, "host2": 0.0})). + Build(), + NewTestDecision("decision-2"). + WithRequestedAt(time.Now()). + WithInput(map[string]float64{"host1": 2.0, "host3": 1.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 1.0, "host3": 0.0})). + Build(), + }, + expectedGlobalDescription: "chain: host1 (0m; 2 decisions)", // Last segment always shows 0m duration + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + resource := NewTestSchedulingDecision("test-global-" + tt.name). + WithDecisions(tt.decisions...). + Build() + + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-global-" + tt.name) + + reconciler := CreateSchedulingReconciler(fakeClient) + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-global-"+tt.name) + AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateResolved) + AssertDecisionCount(t, updatedResource, len(tt.decisions)) + + // Verify global description + if updatedResource.Status.GlobalDescription != tt.expectedGlobalDescription { + t.Errorf("Expected global description '%s', got '%s'", + tt.expectedGlobalDescription, updatedResource.Status.GlobalDescription) + } + + t.Logf("Global description test %s completed: '%s'", tt.name, updatedResource.Status.GlobalDescription) + }) + } +} + +// TestReconcileEmptyDecisionsList tests the case where no decisions are provided +func TestReconcileEmptyDecisionsList(t *testing.T) { + resource := NewTestSchedulingDecision("test-empty-decisions"). + WithDecisions(). // No decisions provided + Build() + + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-empty-decisions") + + reconciler := CreateSchedulingReconciler(fakeClient) + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-empty-decisions") + AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateError) + AssertResourceError(t, updatedResource, "No decisions provided in spec") + + t.Logf("Empty decisions test completed: state=%s, error=%s", updatedResource.Status.State, updatedResource.Status.Error) +} + +// TestReconcileResourceNotFound tests the case where the resource is deleted during reconciliation +func TestReconcileResourceNotFound(t *testing.T) { + fakeClient, _ := SetupTestEnvironment(t) // No resource created + req := CreateTestRequest("non-existent-resource") + + reconciler := CreateSchedulingReconciler(fakeClient) + _, err := reconciler.Reconcile(t.Context(), req) + + // Should gracefully handle when resource is not found (no error) + // This can happen when TTL controller deletes a resource while main controller has queued reconcile request + if err != nil { + t.Fatalf("Expected no error when resource not found (should be handled gracefully), got: %v", err) + } + + t.Logf("Resource not found test completed: gracefully handled with no error") +} + +// TestUtilityFunctions tests the standalone utility functions +func TestUtilityFunctions(t *testing.T) { + t.Run("findWinner", func(t *testing.T) { + tests := []struct { + name string + scores map[string]float64 + expectedWinner string + expectedScore float64 + }{ + { + name: "empty-map", + scores: map[string]float64{}, + expectedWinner: "", + expectedScore: MinScoreValue, + }, + { + name: "single-host", + scores: map[string]float64{"host1": 5.0}, + expectedWinner: "host1", + expectedScore: 5.0, + }, + { + name: "clear-winner", + scores: map[string]float64{"host1": 3.0, "host2": 1.0, "host3": 2.0}, + expectedWinner: "host1", + expectedScore: 3.0, + }, + { + name: "tied-scores", + scores: map[string]float64{"host1": 2.0, "host2": 2.0}, + expectedWinner: "", // Don't check specific winner for tied scores (map iteration order is not deterministic) + expectedScore: 2.0, + }, + { + name: "negative-scores", + scores: map[string]float64{"host1": -1.0, "host2": -2.0}, + expectedWinner: "host1", + expectedScore: -1.0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + winner, score := findWinner(tt.scores) + if tt.expectedWinner != "" && winner != tt.expectedWinner { + t.Errorf("Expected winner '%s', got '%s'", tt.expectedWinner, winner) + } + if score != tt.expectedScore { + t.Errorf("Expected score %f, got %f", tt.expectedScore, score) + } + // For tied scores, just verify we got one of the tied hosts + if tt.name == "tied-scores" { + if winner != "host1" && winner != "host2" { + t.Errorf("Expected winner to be either 'host1' or 'host2', got '%s'", winner) + } + } + }) + } + }) + + t.Run("mapToSortedHostScores", func(t *testing.T) { + scores := map[string]float64{ + "host1": 1.0, + "host2": 3.0, + "host3": 2.0, + } + sorted := mapToSortedHostScores(scores) + + if len(sorted) != 3 { + t.Errorf("Expected 3 sorted hosts, got %d", len(sorted)) + } + + // Should be sorted by score descending + if sorted[0].host != "host2" || sorted[0].score != 3.0 { + t.Errorf("Expected first host to be host2 with score 3.0, got %s with %f", sorted[0].host, sorted[0].score) + } + if sorted[1].host != "host3" || sorted[1].score != 2.0 { + t.Errorf("Expected second host to be host3 with score 2.0, got %s with %f", sorted[1].host, sorted[1].score) + } + if sorted[2].host != "host1" || sorted[2].score != 1.0 { + t.Errorf("Expected third host to be host1 with score 1.0, got %s with %f", sorted[2].host, sorted[2].score) + } + }) + + t.Run("findHostPosition", func(t *testing.T) { + hosts := []hostScore{ + {host: "host2", score: 3.0}, + {host: "host3", score: 2.0}, + {host: "host1", score: 1.0}, + } + + tests := []struct { + targetHost string + expectedPosition int + }{ + {"host2", 1}, // First position + {"host3", 2}, // Second position + {"host1", 3}, // Third position + {"host4", -1}, // Not found + } + + for _, tt := range tests { + position := findHostPosition(hosts, tt.targetHost) + if position != tt.expectedPosition { + t.Errorf("Expected position %d for host %s, got %d", tt.expectedPosition, tt.targetHost, position) + } + } + }) + + t.Run("getCertaintyLevel", func(t *testing.T) { + tests := []struct { + gap float64 + expectedCertainty string + }{ + {1.0, "high"}, // >= 0.5 + {0.5, "high"}, // exactly 0.5 + {0.3, "medium"}, // >= 0.2, < 0.5 + {0.2, "medium"}, // exactly 0.2 + {0.1, "low"}, // >= 0.0, < 0.2 + {0.0, "low"}, // exactly 0.0 + {-0.1, "low"}, // < 0.0 + } + + for _, tt := range tests { + certainty := getCertaintyLevel(tt.gap) + if certainty != tt.expectedCertainty { + t.Errorf("Expected certainty '%s' for gap %f, got '%s'", tt.expectedCertainty, tt.gap, certainty) + } + } + }) +} + +// TestStepImpactAnalysis tests the step impact calculation logic +func TestStepImpactAnalysis(t *testing.T) { + reconciler := &SchedulingDecisionReconciler{} + + t.Run("promotion-scenarios", func(t *testing.T) { + input := map[string]float64{ + "host1": 1.0, // Will become winner + "host2": 3.0, // Initial winner + "host3": 2.0, + } + + outputs := []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "promotion-step", + Activations: map[string]float64{ + "host1": 2.5, // host1: 3.5 (becomes winner) + "host2": -0.5, // host2: 2.5 (demoted) + "host3": 0.0, // host3: 2.0 + }, + }, + } + + finalScores := map[string]float64{ + "host1": 3.5, + "host2": 2.5, + "host3": 2.0, + } + + impacts := reconciler.calculateStepImpacts(input, outputs, finalScores) + + if len(impacts) != 1 { + t.Fatalf("Expected 1 step impact, got %d", len(impacts)) + } + + impact := impacts[0] + if impact.Step != "promotion-step" { + t.Errorf("Expected step 'promotion-step', got '%s'", impact.Step) + } + if !impact.PromotedToFirst { + t.Errorf("Expected PromotedToFirst to be true") + } + if impact.ScoreDelta != 2.5 { + t.Errorf("Expected ScoreDelta 2.5, got %f", impact.ScoreDelta) + } + if impact.CompetitorsRemoved != 0 { + t.Errorf("Expected CompetitorsRemoved 0, got %d", impact.CompetitorsRemoved) + } + }) + + t.Run("competitor-removal", func(t *testing.T) { + input := map[string]float64{ + "host1": 1.0, // Will become winner after competitors removed + "host2": 3.0, // Initial winner, will be removed + "host3": 2.0, // Will be removed + } + + outputs := []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "filter-step", + Activations: map[string]float64{ + "host1": 0.0, // Only host1 survives + }, + }, + } + + finalScores := map[string]float64{ + "host1": 1.0, + } + + impacts := reconciler.calculateStepImpacts(input, outputs, finalScores) + + if len(impacts) != 1 { + t.Fatalf("Expected 1 step impact, got %d", len(impacts)) + } + + impact := impacts[0] + if impact.CompetitorsRemoved != 2 { + t.Errorf("Expected CompetitorsRemoved 2, got %d", impact.CompetitorsRemoved) + } + if !impact.PromotedToFirst { + t.Errorf("Expected PromotedToFirst to be true (host1 was not #1 before, became #1 after competitors removed)") + } + if impact.ScoreDelta != 0.0 { + t.Errorf("Expected ScoreDelta 0.0, got %f", impact.ScoreDelta) + } + }) + + t.Run("empty-inputs", func(t *testing.T) { + // Test with empty final scores + impacts := reconciler.calculateStepImpacts(map[string]float64{}, []v1alpha1.SchedulingDecisionPipelineOutputSpec{}, map[string]float64{}) + if len(impacts) != 0 { + t.Errorf("Expected 0 impacts for empty inputs, got %d", len(impacts)) + } + + // Test with no outputs + impacts = reconciler.calculateStepImpacts(map[string]float64{"host1": 1.0}, []v1alpha1.SchedulingDecisionPipelineOutputSpec{}, map[string]float64{"host1": 1.0}) + if len(impacts) != 0 { + t.Errorf("Expected 0 impacts for no outputs, got %d", len(impacts)) + } + }) +} + +// TestLargeDatasetPerformance tests the controller with larger datasets +func TestLargeDatasetPerformance(t *testing.T) { + // Create a decision with many hosts + input := make(map[string]float64) + activations := make(map[string]float64) + + for i := 0; i < 100; i++ { + hostName := fmt.Sprintf("host%d", i) + input[hostName] = float64(i) + activations[hostName] = float64(i % 10) // Vary activations + } + + decision := NewTestDecision("large-decision"). + WithInput(input). + WithPipelineOutputs( + NewTestPipelineOutput("weigher1", activations), + NewTestPipelineOutput("weigher2", activations), + NewTestPipelineOutput("weigher3", activations), + ). + Build() + + resource := NewTestSchedulingDecision("test-large-dataset"). + WithDecisions(decision). + Build() + + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-large-dataset") + + reconciler := CreateSchedulingReconciler(fakeClient) + + start := time.Now() + _, err := reconciler.Reconcile(t.Context(), req) + duration := time.Since(start) + + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Verify the result + updatedResource := AssertResourceExists(t, fakeClient, "test-large-dataset") + AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateResolved) + AssertResultCount(t, updatedResource, 1) + + result := updatedResource.Status.Results[0] + if len(result.FinalScores) != 100 { + t.Errorf("Expected 100 final scores, got %d", len(result.FinalScores)) + } + + t.Logf("Large dataset test completed in %v with %d hosts", duration, len(result.FinalScores)) + + // Performance check - should complete within reasonable time + if duration > 5*time.Second { + t.Errorf("Large dataset processing took too long: %v", duration) + } +} diff --git a/decisions/internal/controller/test_helpers.go b/decisions/internal/controller/test_helpers.go new file mode 100644 index 00000000..c61ef73a --- /dev/null +++ b/decisions/internal/controller/test_helpers.go @@ -0,0 +1,325 @@ +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package controller + +import ( + "strings" + "testing" + "time" + + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + decisionsv1alpha1 "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" +) + +// Test constants to reduce magic numbers +const ( + DefaultTestTTL = 2 * time.Hour + DefaultTestAge = 1 * time.Hour + OldTestAge = 3 * time.Hour + TestTolerance = 1 * time.Minute + DefaultTestVCPUs = 1 + DefaultTestRAM = 2048 + DefaultTestDisk = 10 +) + +// TestDecisionBuilder helps build SchedulingDecisionRequest objects for tests +type TestDecisionBuilder struct { + decision decisionsv1alpha1.SchedulingDecisionRequest +} + +func NewTestDecision(id string) *TestDecisionBuilder { + return &TestDecisionBuilder{ + decision: decisionsv1alpha1.SchedulingDecisionRequest{ + ID: id, + RequestedAt: metav1.NewTime(time.Now()), + EventType: decisionsv1alpha1.SchedulingEventTypeInitialPlacement, + Input: map[string]float64{ + "host1": 1.0, + }, + Pipeline: decisionsv1alpha1.SchedulingDecisionPipelineSpec{ + Name: "test-pipeline", + }, + Flavor: decisionsv1alpha1.Flavor{ + Name: "test-flavor", + Resources: map[string]resource.Quantity{ + "cpu": *resource.NewQuantity(int64(DefaultTestVCPUs), resource.DecimalSI), + "memory": *resource.NewQuantity(int64(DefaultTestRAM), resource.DecimalSI), + "storage": *resource.NewQuantity(int64(DefaultTestDisk), resource.DecimalSI), + }, + }, + }, + } +} + +// WithRequestedAt sets the RequestedAt timestamp +func (b *TestDecisionBuilder) WithRequestedAt(t time.Time) *TestDecisionBuilder { + b.decision.RequestedAt = metav1.NewTime(t) + return b +} + +// WithInput sets the input hosts and scores +func (b *TestDecisionBuilder) WithInput(input map[string]float64) *TestDecisionBuilder { + b.decision.Input = input + return b +} + +// WithPipelineOutputs sets the pipeline outputs +func (b *TestDecisionBuilder) WithPipelineOutputs(outputs ...decisionsv1alpha1.SchedulingDecisionPipelineOutputSpec) *TestDecisionBuilder { + b.decision.Pipeline.Outputs = outputs + return b +} + +// WithEventType sets the event type +func (b *TestDecisionBuilder) WithEventType(eventType decisionsv1alpha1.SchedulingEventType) *TestDecisionBuilder { + b.decision.EventType = eventType + return b +} + +// Build returns the built SchedulingDecisionRequest +func (b *TestDecisionBuilder) Build() decisionsv1alpha1.SchedulingDecisionRequest { + return b.decision +} + +// TestSchedulingDecisionBuilder helps build SchedulingDecision objects for tests +type TestSchedulingDecisionBuilder struct { + resource decisionsv1alpha1.SchedulingDecision +} + +// NewTestSchedulingDecision creates a new test SchedulingDecision builder +func NewTestSchedulingDecision(name string) *TestSchedulingDecisionBuilder { + return &TestSchedulingDecisionBuilder{ + resource: decisionsv1alpha1.SchedulingDecision{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Spec: decisionsv1alpha1.SchedulingDecisionSpec{ + Decisions: []decisionsv1alpha1.SchedulingDecisionRequest{}, + }, + }, + } +} + +// WithDecisions adds decisions to the SchedulingDecision +func (b *TestSchedulingDecisionBuilder) WithDecisions(decisions ...decisionsv1alpha1.SchedulingDecisionRequest) *TestSchedulingDecisionBuilder { + b.resource.Spec.Decisions = decisions + return b +} + +// WithCreationTimestamp sets the creation timestamp +func (b *TestSchedulingDecisionBuilder) WithCreationTimestamp(t time.Time) *TestSchedulingDecisionBuilder { + b.resource.ObjectMeta.CreationTimestamp = metav1.NewTime(t) + return b +} + +// WithNamespace sets the namespace +func (b *TestSchedulingDecisionBuilder) WithNamespace(namespace string) *TestSchedulingDecisionBuilder { + b.resource.ObjectMeta.Namespace = namespace + return b +} + +// Build returns the built SchedulingDecision +func (b *TestSchedulingDecisionBuilder) Build() *decisionsv1alpha1.SchedulingDecision { + return &b.resource +} + +// NewTestPipelineOutput creates a pipeline output spec for testing +func NewTestPipelineOutput(step string, activations map[string]float64) decisionsv1alpha1.SchedulingDecisionPipelineOutputSpec { + return decisionsv1alpha1.SchedulingDecisionPipelineOutputSpec{ + Step: step, + Activations: activations, + } +} + +// SetupTestEnvironment creates a fake client and scheme for testing +func SetupTestEnvironment(t *testing.T, resources ...client.Object) (client.Client, *runtime.Scheme) { + t.Helper() + + scheme := runtime.NewScheme() + if err := decisionsv1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add scheme: %v", err) + } + + clientBuilder := fake.NewClientBuilder().WithScheme(scheme) + if len(resources) > 0 { + clientBuilder = clientBuilder.WithObjects(resources...) + } + + // Add status subresource for SchedulingDecision + fakeClient := clientBuilder.WithStatusSubresource(&decisionsv1alpha1.SchedulingDecision{}).Build() + + return fakeClient, scheme +} + +// CreateTestRequest creates a controller request for testing +func CreateTestRequest(name string, namespace ...string) ctrl.Request { + req := ctrl.Request{ + NamespacedName: client.ObjectKey{ + Name: name, + }, + } + if len(namespace) > 0 { + req.NamespacedName.Namespace = namespace[0] + } + return req +} + +// AssertResourceExists checks that a resource exists and returns it +func AssertResourceExists(t *testing.T, c client.Client, name string, namespace ...string) *decisionsv1alpha1.SchedulingDecision { + t.Helper() + + key := client.ObjectKey{Name: name} + if len(namespace) > 0 { + key.Namespace = namespace[0] + } + + var resource decisionsv1alpha1.SchedulingDecision + if err := c.Get(t.Context(), key, &resource); err != nil { + t.Fatalf("Resource %s should exist: %v", name, err) + } + return &resource +} + +// AssertResourceDeleted checks that a resource has been deleted +func AssertResourceDeleted(t *testing.T, c client.Client, name string, namespace ...string) { + t.Helper() + + key := client.ObjectKey{Name: name} + if len(namespace) > 0 { + key.Namespace = namespace[0] + } + + var resource decisionsv1alpha1.SchedulingDecision + err := c.Get(t.Context(), key, &resource) + if err == nil { + t.Errorf("Resource %s should have been deleted", name) + } +} + +// AssertResourceState checks the state of a SchedulingDecision +func AssertResourceState(t *testing.T, resource *decisionsv1alpha1.SchedulingDecision, expectedState decisionsv1alpha1.SchedulingDecisionState) { + t.Helper() + + if resource.Status.State != expectedState { + t.Errorf("Expected state '%s', got '%s'", expectedState, resource.Status.State) + } +} + +// AssertResourceError checks the error message of a SchedulingDecision +func AssertResourceError(t *testing.T, resource *decisionsv1alpha1.SchedulingDecision, expectedError string) { + t.Helper() + + if resource.Status.Error != expectedError { + t.Errorf("Expected error '%s', got '%s'", expectedError, resource.Status.Error) + } +} + +// AssertNoError checks that there's no error in the resource status +func AssertNoError(t *testing.T, resource *decisionsv1alpha1.SchedulingDecision) { + t.Helper() + + if resource.Status.Error != "" { + t.Errorf("Expected no error, got '%s'", resource.Status.Error) + } +} + +// AssertResultCount checks the number of results in a SchedulingDecision +func AssertResultCount(t *testing.T, resource *decisionsv1alpha1.SchedulingDecision, expectedCount int) { + t.Helper() + + if len(resource.Status.Results) != expectedCount { + t.Errorf("Expected %d results, got %d", expectedCount, len(resource.Status.Results)) + } +} + +// AssertDecisionCount checks the decision count in a SchedulingDecision +func AssertDecisionCount(t *testing.T, resource *decisionsv1alpha1.SchedulingDecision, expectedCount int) { + t.Helper() + + if resource.Status.DecisionCount != expectedCount { + t.Errorf("Expected decision count %d, got %d", expectedCount, resource.Status.DecisionCount) + } +} + +// AssertFinalScores checks the final scores in a result +func AssertFinalScores(t *testing.T, result decisionsv1alpha1.SchedulingDecisionResult, expectedScores map[string]float64) { + t.Helper() + + if len(result.FinalScores) != len(expectedScores) { + t.Errorf("Expected %d final scores, got %d", len(expectedScores), len(result.FinalScores)) + } + + for host, expectedScore := range expectedScores { + if actualScore, exists := result.FinalScores[host]; !exists { + t.Errorf("Expected final score for host '%s', but it was not found", host) + } else if actualScore != expectedScore { + t.Errorf("Expected final score for host '%s' to be %f, got %f", host, expectedScore, actualScore) + } + } +} + +// AssertDeletedHosts checks the deleted hosts in a result +func AssertDeletedHosts(t *testing.T, result decisionsv1alpha1.SchedulingDecisionResult, expectedDeletedHosts map[string][]string) { + t.Helper() + + if len(result.DeletedHosts) != len(expectedDeletedHosts) { + t.Errorf("Expected %d deleted hosts, got %d", len(expectedDeletedHosts), len(result.DeletedHosts)) + } + + for host, expectedSteps := range expectedDeletedHosts { + if actualSteps, exists := result.DeletedHosts[host]; !exists { + t.Errorf("Expected deleted host '%s', but it was not found", host) + } else if len(actualSteps) != len(expectedSteps) { + t.Errorf("Expected host '%s' to be deleted by %d steps, got %d", host, len(expectedSteps), len(actualSteps)) + } else { + for i, expectedStep := range expectedSteps { + if actualSteps[i] != expectedStep { + t.Errorf("Expected host '%s' step %d to be '%s', got '%s'", host, i, expectedStep, actualSteps[i]) + } + } + } + } +} + +// AssertDescriptionContains checks that a description contains expected text +func AssertDescriptionContains(t *testing.T, description string, expectedContents ...string) { + t.Helper() + + for _, expectedContent := range expectedContents { + if !strings.Contains(description, expectedContent) { + t.Errorf("Expected description to contain '%s', got '%s'", expectedContent, description) + } + } +} + +// CreateTTLReconciler creates a TTL reconciler with the given TTL duration +// If ttlSeconds is 0, the reconciler will use its internal default +func CreateTTLReconciler(fakeClient client.Client, scheme *runtime.Scheme, ttl time.Duration) *SchedulingDecisionTTLController { + ttlSeconds := int(ttl.Seconds()) + return &SchedulingDecisionTTLController{ + Client: fakeClient, + Scheme: scheme, + Conf: Config{ + TTLAfterDecisionSeconds: ttlSeconds, + }, + } +} + +// CreateSchedulingReconciler creates a scheduling decision reconciler +// If conf is empty, uses default empty config +func CreateSchedulingReconciler(fakeClient client.Client, conf ...Config) *SchedulingDecisionReconciler { + var config Config + if len(conf) > 0 { + config = conf[0] + } + return &SchedulingDecisionReconciler{ + Conf: config, + Client: fakeClient, + } +} diff --git a/decisions/internal/controller/ttl_controller.go b/decisions/internal/controller/ttl_controller.go new file mode 100644 index 00000000..db5affa2 --- /dev/null +++ b/decisions/internal/controller/ttl_controller.go @@ -0,0 +1,165 @@ +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package controller + +import ( + "context" + "time" + + "github.com/go-logr/logr" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/predicate" + + decisionsv1alpha1 "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" +) + +// TTLStartupReconciler handles startup reconciliation for existing resources +type TTLStartupReconciler struct { + ttlController *SchedulingDecisionTTLController +} + +// Start implements the Runnable interface and runs startup reconciliation +func (s *TTLStartupReconciler) Start(ctx context.Context) error { + log := logf.FromContext(ctx).WithName("ttl-startup-reconciler") + log.Info("Starting TTL startup reconciliation for existing resources") + + s.ttlController.reconcileAllResourcesOnStartup(ctx) + return nil +} + +// SchedulingDecisionTTLController handles automatic cleanup of resolved SchedulingDecision resources +// after a configurable TTL period. +type SchedulingDecisionTTLController struct { + // Client for the kubernetes API. + client.Client + // Kubernetes scheme to use for the decisions. + Scheme *runtime.Scheme + // Configuration for the TTL controller. + Conf Config +} + +// +kubebuilder:rbac:groups=decisions.cortex,resources=schedulingdecisions,verbs=get;list;watch;delete +// +kubebuilder:rbac:groups=decisions.cortex,resources=schedulingdecisions/status,verbs=get + +func (r *SchedulingDecisionTTLController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := logf.FromContext(ctx).WithName("ttl-controller") + + // Fetch the decision object + var decision decisionsv1alpha1.SchedulingDecision + if err := r.Get(ctx, req.NamespacedName, &decision); err != nil { + // Resource was deleted or doesn't exist - nothing to clean up + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + return r.processResourceForTTL(ctx, &decision, log) +} + +func (r *SchedulingDecisionTTLController) getTTL() time.Duration { + if r.Conf.TTLAfterDecisionSeconds > 0 { + return time.Duration(r.Conf.TTLAfterDecisionSeconds) * time.Second + } + return time.Duration(DefaultTTLAfterDecisionSeconds) * time.Second +} + +// processResourceForTTL handles the common TTL logic for a single resource +func (r *SchedulingDecisionTTLController) processResourceForTTL(ctx context.Context, decision *decisionsv1alpha1.SchedulingDecision, log logr.Logger) (ctrl.Result, error) { + // Calculate age based on last decision's RequestedAt timestamp + var referenceTime time.Time + if len(decision.Spec.Decisions) > 0 { + // Use the last decision's RequestedAt timestamp + lastDecision := decision.Spec.Decisions[len(decision.Spec.Decisions)-1] + referenceTime = lastDecision.RequestedAt.Time + } else { + // Fallback to creation timestamp if no decisions exist + referenceTime = decision.CreationTimestamp.Time + } + + age := time.Since(referenceTime) + ttl := r.getTTL() + + if age >= ttl { + // TTL has expired - delete the resource + log.Info("Deleting expired SchedulingDecision", + "name", decision.Name, + "age", age.String(), + "ttl", ttl.String()) + + if err := r.Delete(ctx, decision); err != nil { + if client.IgnoreNotFound(err) != nil { + log.Error(err, "Failed to delete expired SchedulingDecision", "name", decision.Name) + return ctrl.Result{}, err + } + log.V(1).Info("SchedulingDecision was already deleted", "name", decision.Name) + } + + return ctrl.Result{}, nil + } + + remainingTime := ttl - age + log.V(1).Info("Scheduling SchedulingDecision for future deletion", + "name", decision.Name, + "remainingTime", remainingTime.String()) + + return ctrl.Result{RequeueAfter: remainingTime}, nil +} + +// reconcileAllResourcesOnStartup processes all existing SchedulingDecision resources +// to check for expired ones that should be cleaned up after controller restart +func (r *SchedulingDecisionTTLController) reconcileAllResourcesOnStartup(ctx context.Context) { + log := logf.FromContext(ctx).WithName("ttl-startup-reconciler") + + var resources decisionsv1alpha1.SchedulingDecisionList + if err := r.List(ctx, &resources); err != nil { + log.Error(err, "Failed to list SchedulingDecision resources during startup reconciliation") + return + } + + log.Info("Processing existing resources for TTL cleanup", "resourceCount", len(resources.Items)) + + processedCount := 0 + expiredCount := 0 + + for _, resource := range resources.Items { + // Use the shared TTL processing logic + result, err := r.processResourceForTTL(ctx, &resource, log) + if err != nil { + log.Error(err, "Failed to process resource during startup reconciliation", "name", resource.Name) + } else if result.RequeueAfter == 0 { + // Resource was deleted (no requeue means it was expired and deleted) + expiredCount++ + } + processedCount++ + } + + log.Info("Startup TTL reconciliation completed", + "processedResources", processedCount, + "expiredResources", expiredCount) +} + +func (r *SchedulingDecisionTTLController) SetupWithManager(mgr ctrl.Manager) error { + log := mgr.GetLogger().WithName("ttl-controller") + + log.Info("TTL Controller configured", "ttlAfterDecisionSeconds", r.getTTL().String()) + + // Add the startup reconciler as a runnable + if err := mgr.Add(&TTLStartupReconciler{ttlController: r}); err != nil { + return err + } + + return ctrl.NewControllerManagedBy(mgr). + For(&decisionsv1alpha1.SchedulingDecision{}). + Named("schedulingdecision-ttl"). + WithOptions(controller.Options{ + MaxConcurrentReconciles: 10, + }). + WithEventFilter( + // Watch for spec changes (when decisions are added/modified) + predicate.GenerationChangedPredicate{}, + ). + Complete(r) +} diff --git a/decisions/internal/controller/ttl_controller_test.go b/decisions/internal/controller/ttl_controller_test.go new file mode 100644 index 00000000..f4945625 --- /dev/null +++ b/decisions/internal/controller/ttl_controller_test.go @@ -0,0 +1,212 @@ +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package controller + +import ( + "context" + "testing" + "time" +) + +func TestTTLController(t *testing.T) { + tests := []struct { + name string + resourceAge time.Duration + ttl time.Duration + expectDeleted bool + expectRequeue bool + }{ + { + name: "young resource preserved", + resourceAge: DefaultTestAge, + ttl: DefaultTestTTL, + expectDeleted: false, + expectRequeue: true, + }, + { + name: "old resource deleted", + resourceAge: OldTestAge, + ttl: DefaultTestTTL, + expectDeleted: true, + expectRequeue: false, + }, + { + name: "resource at TTL boundary deleted", + resourceAge: DefaultTestTTL, + ttl: DefaultTestTTL, + expectDeleted: true, + expectRequeue: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create test resource with specified age + decision := NewTestDecision("decision-1"). + WithRequestedAt(time.Now().Add(-tt.resourceAge)). + Build() + + resource := NewTestSchedulingDecision("test-decision"). + WithDecisions(decision). + Build() + + fakeClient, scheme := SetupTestEnvironment(t, resource) + reconciler := CreateTTLReconciler(fakeClient, scheme, tt.ttl) + req := CreateTestRequest("test-decision") + + result, err := reconciler.Reconcile(context.Background(), req) + if err != nil { + t.Fatalf("Reconcile failed: %v", err) + } + + // Check deletion expectation + if tt.expectDeleted { + AssertResourceDeleted(t, fakeClient, "test-decision") + } else { + AssertResourceExists(t, fakeClient, "test-decision") + } + + // Check requeue expectation + if tt.expectRequeue && result.RequeueAfter == 0 { + t.Error("Expected requeue but got none") + } + if !tt.expectRequeue && result.RequeueAfter != 0 { + t.Error("Expected no requeue but got one") + } + }) + } +} + +func TestTTLControllerFallbackToCreationTimestamp(t *testing.T) { + // Resource with no decisions should use creation timestamp + resource := NewTestSchedulingDecision("empty-decision"). + WithCreationTimestamp(time.Now().Add(-OldTestAge)). + Build() + + fakeClient, scheme := SetupTestEnvironment(t, resource) + reconciler := CreateTTLReconciler(fakeClient, scheme, DefaultTestTTL) + req := CreateTestRequest("empty-decision") + + result, err := reconciler.Reconcile(context.Background(), req) + if err != nil { + t.Fatalf("Reconcile failed: %v", err) + } + + // Should be deleted and not requeued + AssertResourceDeleted(t, fakeClient, "empty-decision") + if result.RequeueAfter != 0 { + t.Error("Expected no requeue after deletion") + } +} + +func TestTTLControllerDefaultTTL(t *testing.T) { + decision := NewTestDecision("decision-1"). + WithRequestedAt(time.Now().Add(-DefaultTestAge)). + Build() + + resource := NewTestSchedulingDecision("default-ttl-decision"). + WithDecisions(decision). + Build() + + fakeClient, scheme := SetupTestEnvironment(t, resource) + + // Create reconciler without TTL config (should use default) + reconciler := CreateTTLReconciler(fakeClient, scheme, 0) // Zero duration means use default + + req := CreateTestRequest("default-ttl-decision") + result, err := reconciler.Reconcile(context.Background(), req) + if err != nil { + t.Fatalf("Reconcile failed: %v", err) + } + + // 1-hour-old resource with default TTL should be preserved + AssertResourceExists(t, fakeClient, "default-ttl-decision") + if result.RequeueAfter == 0 { + t.Error("Expected requeue for resource with default TTL") + } + + // Verify requeue time is reasonable + expectedRequeue := time.Duration(DefaultTTLAfterDecisionSeconds)*time.Second - DefaultTestAge + if result.RequeueAfter < expectedRequeue-TestTolerance || result.RequeueAfter > expectedRequeue+TestTolerance { + t.Errorf("Requeue time %v not within expected range %v ± %v", + result.RequeueAfter, expectedRequeue, TestTolerance) + } +} + +func TestTTLControllerNonExistentResource(t *testing.T) { + fakeClient, scheme := SetupTestEnvironment(t) + reconciler := CreateTTLReconciler(fakeClient, scheme, DefaultTestTTL) + req := CreateTestRequest("non-existent") + + result, err := reconciler.Reconcile(context.Background(), req) + if err != nil { + t.Fatalf("Should handle non-existent resources gracefully: %v", err) + } + + if result.RequeueAfter != 0 { + t.Error("Expected no requeue for non-existent resource") + } +} + +func TestTTLStartupReconciliation(t *testing.T) { + // Create resources with different ages + expiredDecision := NewTestDecision("expired-decision"). + WithRequestedAt(time.Now().Add(-OldTestAge)). + Build() + + youngDecision := NewTestDecision("young-decision"). + WithRequestedAt(time.Now().Add(-DefaultTestAge)). + Build() + + expiredResource := NewTestSchedulingDecision("expired-resource"). + WithDecisions(expiredDecision). + Build() + + youngResource := NewTestSchedulingDecision("young-resource"). + WithDecisions(youngDecision). + Build() + + fakeClient, scheme := SetupTestEnvironment(t, expiredResource, youngResource) + reconciler := CreateTTLReconciler(fakeClient, scheme, DefaultTestTTL) + + // Run startup reconciliation + reconciler.reconcileAllResourcesOnStartup(context.Background()) + + // Verify expired resource was deleted + AssertResourceDeleted(t, fakeClient, "expired-resource") + + // Verify young resource still exists + AssertResourceExists(t, fakeClient, "young-resource") +} + +func TestTTLStartupReconcilerRunnable(t *testing.T) { + fakeClient, scheme := SetupTestEnvironment(t) + reconciler := CreateTTLReconciler(fakeClient, scheme, DefaultTestTTL) + + // Create the startup reconciler + startupReconciler := &TTLStartupReconciler{ttlController: reconciler} + + // Test the Start method + err := startupReconciler.Start(context.Background()) + if err != nil { + t.Fatalf("TTLStartupReconciler.Start() should not return error: %v", err) + } + + // The method should complete without error (no resources to process) + t.Log("TTLStartupReconciler.Start() completed successfully") +} + +func TestTTLStartupReconciliationErrorHandling(t *testing.T) { + // This test verifies that startup reconciliation handles errors gracefully + // We can't easily simulate List() failures with the fake client, but we can + // test that the method doesn't panic and handles empty results properly + + fakeClient, scheme := SetupTestEnvironment(t) // No resources + reconciler := CreateTTLReconciler(fakeClient, scheme, DefaultTestTTL) + + // This should complete without error even with no resources + reconciler.reconcileAllResourcesOnStartup(context.Background()) + + t.Log("Startup reconciliation handled empty resource list gracefully") +} diff --git a/go.mod b/go.mod index f8096cb7..5284cafc 100644 --- a/go.mod +++ b/go.mod @@ -4,11 +4,13 @@ go 1.25.0 replace ( github.com/cobaltcore-dev/cortex/commands => ./commands + github.com/cobaltcore-dev/cortex/decisions/api => ./decisions/api github.com/cobaltcore-dev/cortex/reservations/api => ./reservations/api github.com/cobaltcore-dev/cortex/testlib => ./testlib ) require ( + github.com/cobaltcore-dev/cortex/decisions/api v0.0.0-00010101000000-000000000000 github.com/cobaltcore-dev/cortex/reservations/api v0.0.0-00010101000000-000000000000 github.com/dlmiddlecote/sqlstats v1.0.2 github.com/eclipse/paho.mqtt.golang v1.5.1 diff --git a/helm/library/cortex-core/templates/rbac.yaml b/helm/library/cortex-core/templates/rbac.yaml index 57903041..baca03ab 100644 --- a/helm/library/cortex-core/templates/rbac.yaml +++ b/helm/library/cortex-core/templates/rbac.yaml @@ -1,7 +1,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: - name: {{ .Release.Namespace }}-{{ include "cortex.fullname" . }} + name: {{ .Release.Namespace }}-{{ include "cortex.fullname" . }}-computereservation roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole @@ -12,6 +12,20 @@ subjects: name: {{ .Release.Namespace }}-{{ include "cortex.fullname" . }} namespace: {{ .Release.Namespace }} --- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ .Release.Namespace }}-{{ include "cortex.fullname" . }}-schedulingdescision +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + # From the decisions operator. + name: schedulingdecision-editor-role +subjects: +- kind: ServiceAccount + name: {{ .Release.Namespace }}-{{ include "cortex.fullname" . }} + namespace: {{ .Release.Namespace }} +--- apiVersion: v1 kind: ServiceAccount metadata: diff --git a/internal/scheduler/cinder/api/http/api_test.go b/internal/scheduler/cinder/api/http/api_test.go index dcf1b251..2c1e8cb2 100644 --- a/internal/scheduler/cinder/api/http/api_test.go +++ b/internal/scheduler/cinder/api/http/api_test.go @@ -21,6 +21,18 @@ type mockPipeline struct { runFunc func(api.ExternalSchedulerRequest) ([]string, error) } +func (p *mockPipeline) SetConsumer(consumer scheduler.SchedulingDecisionConsumer[api.ExternalSchedulerRequest]) { + +} + +func (p *mockPipeline) Consume( + request api.ExternalSchedulerRequest, + applicationOrder []string, + inWeights map[string]float64, + stepWeights map[string]map[string]float64, +) { +} + func (m *mockPipeline) Run(req api.ExternalSchedulerRequest) ([]string, error) { return m.runFunc(req) } diff --git a/internal/scheduler/manila/api/http/api_test.go b/internal/scheduler/manila/api/http/api_test.go index 6a839216..2c85c6b2 100644 --- a/internal/scheduler/manila/api/http/api_test.go +++ b/internal/scheduler/manila/api/http/api_test.go @@ -21,6 +21,18 @@ type mockPipeline struct { runFunc func(api.ExternalSchedulerRequest) ([]string, error) } +func (p *mockPipeline) SetConsumer(consumer scheduler.SchedulingDecisionConsumer[api.ExternalSchedulerRequest]) { + +} + +func (p *mockPipeline) Consume( + request api.ExternalSchedulerRequest, + applicationOrder []string, + inWeights map[string]float64, + stepWeights map[string]map[string]float64, +) { +} + func (m *mockPipeline) Run(req api.ExternalSchedulerRequest) ([]string, error) { return m.runFunc(req) } diff --git a/internal/scheduler/nova/api/http/api.go b/internal/scheduler/nova/api/http/api.go index 14b5b027..14986bdb 100644 --- a/internal/scheduler/nova/api/http/api.go +++ b/internal/scheduler/nova/api/http/api.go @@ -15,6 +15,7 @@ import ( "strings" "time" + "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/conf" "github.com/cobaltcore-dev/cortex/internal/db" "github.com/cobaltcore-dev/cortex/internal/monitoring" @@ -26,6 +27,9 @@ import ( "github.com/majewsky/gg/option" "github.com/sapcc/go-api-declarations/liquid" "github.com/sapcc/go-bits/jobloop" + "sigs.k8s.io/controller-runtime/pkg/client" + + ctrl "sigs.k8s.io/controller-runtime" ) type HTTPAPI interface { @@ -40,6 +44,9 @@ type httpAPI struct { // Database connection to load specific objects during the scheduling process. DB db.DB + + // Kubernetes client + Client client.Client } func NewAPI(config conf.SchedulerConfig, registry *monitoring.Registry, db db.DB, mqttClient mqtt.Client) HTTPAPI { @@ -53,11 +60,26 @@ func NewAPI(config conf.SchedulerConfig, registry *monitoring.Registry, db db.DB pipelineConf, db, monitor.SubPipeline("nova-"+pipelineConf.Name), mqttClient, ) } + + scheme, err := v1alpha1.SchemeBuilder.Build() + if err != nil { + panic(err) + } + clientConfig, err := ctrl.GetConfig() + if err != nil { + panic(err) + } + cl, err := client.New(clientConfig, client.Options{Scheme: scheme}) + if err != nil { + panic(err) + } + return &httpAPI{ pipelines: pipelines, config: config, monitor: scheduler.NewSchedulerMonitor(registry), DB: db, + Client: cl, // TODO } } @@ -69,6 +91,7 @@ func (httpAPI *httpAPI) Init(mux *http.ServeMux) { } mux.HandleFunc("/scheduler/nova/external", httpAPI.NovaExternalScheduler) mux.HandleFunc("/scheduler/nova/commitments/change", httpAPI.HandleCommitmentChangeRequest) + mux.HandleFunc("/scheduler/nova/scheduling-decisions", httpAPI.HandleListSchedulingDecisions) } // Check if the scheduler can run based on the request data. @@ -408,3 +431,64 @@ func (httpAPI *httpAPI) HandleCommitmentChangeRequest(w http.ResponseWriter, r * } callback.Respond(http.StatusOK, nil, "") } + +// List all scheduling decisions. +func (httpAPI *httpAPI) HandleListSchedulingDecisions(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Access-Control-Allow-Origin", "http://localhost:4000") + w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS") + w.Header().Set("Access-Control-Allow-Headers", "Content-Type") + + // Handle preflight OPTIONS request + if r.Method == http.MethodOptions { + w.WriteHeader(http.StatusOK) + return + } + + callback := httpAPI.monitor.Callback(w, r, "/scheduler/nova/scheduling-decisions") + + // Exit early if the request method is not GET. + if r.Method != http.MethodGet { + internalErr := fmt.Errorf("invalid request method: %s", r.Method) + callback.Respond(http.StatusMethodNotAllowed, internalErr, "invalid request method") + return + } + + // Check if a specific vm id is requested. + vmID := r.URL.Query().Get("vm_id") + + // If no specific vm id is requested, list all scheduling decisions. + if vmID == "" { + var decisions v1alpha1.SchedulingDecisionList + if err := httpAPI.Client.List(r.Context(), &decisions); err != nil { + callback.Respond(http.StatusInternalServerError, err, "failed to list scheduling decisions") + return + } + w.Header().Set("Content-Type", "application/json") + + if err := json.NewEncoder(w).Encode(decisions); err != nil { + callback.Respond(http.StatusInternalServerError, err, "failed to encode response") + return + } + return + } + + var decision v1alpha1.SchedulingDecision + nn := client.ObjectKey{Name: vmID} + if err := httpAPI.Client.Get(r.Context(), nn, &decision); err != nil { + if client.IgnoreNotFound(err) != nil { + callback.Respond(http.StatusInternalServerError, err, "failed to get scheduling decision") + return + } + // Not found + callback.Respond(http.StatusNotFound, err, "scheduling decision not found") + return + } + + w.Header().Set("Content-Type", "application/json") + + if err := json.NewEncoder(w).Encode(decision); err != nil { + callback.Respond(http.StatusInternalServerError, err, "failed to encode response") + return + } + callback.Respond(http.StatusOK, nil, "Success") +} diff --git a/internal/scheduler/nova/api/http/api_test.go b/internal/scheduler/nova/api/http/api_test.go index 29c69688..5f0f879b 100644 --- a/internal/scheduler/nova/api/http/api_test.go +++ b/internal/scheduler/nova/api/http/api_test.go @@ -30,6 +30,19 @@ func (m *mockExternalSchedulerPipeline) Run(request api.ExternalSchedulerRequest return []string{"host1"}, nil } +func (m *mockExternalSchedulerPipeline) SetConsumer(consumer scheduler.SchedulingDecisionConsumer[api.ExternalSchedulerRequest]) { + // Do nothing +} + +func (m *mockExternalSchedulerPipeline) Consume( + request api.ExternalSchedulerRequest, + applicationOrder []string, + inWeights map[string]float64, + stepWeights map[string]map[string]float64, +) { + // Do nothing +} + func TestCanRunScheduler(t *testing.T) { httpAPI := &httpAPI{ pipelines: map[string]scheduler.Pipeline[api.ExternalSchedulerRequest]{ @@ -255,6 +268,18 @@ type mockCommitmentsPipeline struct { shouldError bool } +func (p *mockCommitmentsPipeline) SetConsumer(consumer scheduler.SchedulingDecisionConsumer[api.ExternalSchedulerRequest]) { + +} + +func (p *mockCommitmentsPipeline) Consume( + request api.ExternalSchedulerRequest, + applicationOrder []string, + inWeights map[string]float64, + stepWeights map[string]map[string]float64, +) { +} + func (p *mockCommitmentsPipeline) Run(request api.ExternalSchedulerRequest) ([]string, error) { if p.shouldError { return nil, errors.New("mock error") diff --git a/internal/scheduler/nova/pipeline.go b/internal/scheduler/nova/pipeline.go index b9f1f2d5..ce3b1b2c 100644 --- a/internal/scheduler/nova/pipeline.go +++ b/internal/scheduler/nova/pipeline.go @@ -4,9 +4,12 @@ package nova import ( + "context" "errors" "log/slog" + "math" + "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/conf" "github.com/cobaltcore-dev/cortex/internal/db" "github.com/cobaltcore-dev/cortex/internal/mqtt" @@ -16,6 +19,10 @@ import ( "github.com/cobaltcore-dev/cortex/internal/scheduler/nova/plugins/shared" "github.com/cobaltcore-dev/cortex/internal/scheduler/nova/plugins/vmware" "github.com/cobaltcore-dev/cortex/internal/sync/openstack/nova" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" ) type NovaStep = scheduler.Step[api.ExternalSchedulerRequest] @@ -59,6 +66,149 @@ type novaPipeline struct { preselectAllHosts bool } +type novaPipelineConsumer struct { + // Kubernetes client to create decision resources. + Client client.Client +} + +func NewNovaPipelineConsumer() *novaPipelineConsumer { + var kubernetesClient client.Client + if scheme, err := v1alpha1.SchemeBuilder.Build(); err == nil { + if clientConfig, err := ctrl.GetConfig(); err == nil { + if cl, err := client.New(clientConfig, client.Options{Scheme: scheme}); err == nil { + // Successfully created a client, use it. + kubernetesClient = cl + } + } + } + return &novaPipelineConsumer{ + Client: kubernetesClient, + } +} + +func (c *novaPipelineConsumer) Consume( + request api.ExternalSchedulerRequest, + applicationOrder []string, + inWeights map[string]float64, + stepWeights map[string]map[string]float64, +) { + + if c.Client == nil { + return + } + + // Determine the event type based on request flags + var eventType v1alpha1.SchedulingEventType + switch { + case request.Live: + eventType = v1alpha1.SchedulingEventTypeLiveMigration + case request.Resize: + eventType = v1alpha1.SchedulingEventTypeResize + default: + eventType = v1alpha1.SchedulingEventTypeInitialPlacement + } + + outputs := []v1alpha1.SchedulingDecisionPipelineOutputSpec{} + for _, stepKey := range applicationOrder { + weights, ok := stepWeights[stepKey] + if !ok { + // This is ok, since steps can be skipped. + continue + } + activations := make(map[string]float64, len(weights)) + for k, v := range weights { + activations[k] = math.Tanh(v) + } + outputs = append(outputs, v1alpha1.SchedulingDecisionPipelineOutputSpec{ + Step: stepKey, + Activations: activations, + }) + } + + // Initialize default values for resource calculation + var vcpus, ram, disk int + var flavorName string + var resources map[string]resource.Quantity + + if request.Spec.Data.Flavor.Data.Name == "" { + slog.Warn("scheduler: Flavor data is missing, using zero values for resources", "instanceUUID", request.Spec.Data.InstanceUUID) + // Use zero values for resources + resources = map[string]resource.Quantity{ + "cpu": *resource.NewQuantity(0, resource.DecimalSI), + "memory": *resource.NewQuantity(0, resource.DecimalSI), + "storage": *resource.NewQuantity(0, resource.DecimalSI), + } + flavorName = "unknown" + } else { + flavor := request.Spec.Data.Flavor + flavorName = flavor.Data.Name + + vcpus = int(math.Min(float64(flavor.Data.VCPUs), math.MaxInt)) + ram = int(math.Min(float64(flavor.Data.MemoryMB), math.MaxInt)) + disk = int(math.Min(float64(flavor.Data.RootGB), math.MaxInt)) + + resources = map[string]resource.Quantity{ + "cpu": *resource.NewQuantity(int64(vcpus), resource.DecimalSI), + "memory": *resource.NewQuantity(int64(ram), resource.DecimalSI), + "storage": *resource.NewQuantity(int64(disk), resource.DecimalSI), + } + } + + if request.VMware { + resources["hypervisor.vmware"] = *resource.NewQuantity(1, resource.DecimalSI) + resources["hypervisor.kvm"] = *resource.NewQuantity(0, resource.DecimalSI) + } else { + resources["hypervisor.vmware"] = *resource.NewQuantity(0, resource.DecimalSI) + resources["hypervisor.kvm"] = *resource.NewQuantity(1, resource.DecimalSI) + } + + decisionRequest := v1alpha1.SchedulingDecisionRequest{ + ID: request.Context.RequestID, + RequestedAt: metav1.Now(), + EventType: eventType, + Input: inWeights, + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: request.GetPipeline(), + Outputs: outputs, + }, + AvailabilityZone: request.Spec.Data.AvailabilityZone, + Flavor: v1alpha1.Flavor{ + Name: flavorName, + Resources: resources, + }, + } + + objectKey := client.ObjectKey{Name: request.Spec.Data.InstanceUUID} + + // Try to update existing decision first + var existing v1alpha1.SchedulingDecision + if err := c.Client.Get(context.Background(), objectKey, &existing); err == nil { + // Decision already exists, append the new decision to the existing ones + existing.Spec.Decisions = append(existing.Spec.Decisions, decisionRequest) + + if err := c.Client.Update(context.Background(), &existing); err != nil { + slog.Error("scheduler: failed to update existing decision", "error", err, "resourceID", request.Spec.Data.InstanceUUID) + return + } + slog.Info("scheduler: appended decision to existing resource", "resourceID", request.Spec.Data.InstanceUUID, "eventType", eventType) + return + } + + // Decision doesn't exist, create a new one + decision := &v1alpha1.SchedulingDecision{ + ObjectMeta: ctrl.ObjectMeta{Name: request.Spec.Data.InstanceUUID}, + Spec: v1alpha1.SchedulingDecisionSpec{ + Decisions: []v1alpha1.SchedulingDecisionRequest{decisionRequest}, + }, + // Status will be filled in by the controller. + } + if err := c.Client.Create(context.Background(), decision); err != nil { + slog.Error("scheduler: failed to create decision", "error", err, "resourceID", request.Spec.Data.InstanceUUID) + return + } + slog.Info("scheduler: created new decision", "resourceID", request.Spec.Data.InstanceUUID, "eventType", eventType) +} + // Create a new Nova scheduler pipeline. func NewPipeline( config conf.NovaSchedulerPipelineConfig, @@ -89,7 +239,9 @@ func NewPipeline( supportedSteps, config.Plugins, wrappers, db, monitor, mqttClient, TopicFinished, ) - return &novaPipeline{pipeline, db, config.PreselectAllHosts} + wrapped := &novaPipeline{pipeline, db, config.PreselectAllHosts} + wrapped.SetConsumer(NewNovaPipelineConsumer()) + return wrapped } // If needed, modify the request before sending it off to the pipeline. diff --git a/internal/scheduler/nova/pipeline_test.go b/internal/scheduler/nova/pipeline_test.go index cbd6e5a0..50aa152d 100644 --- a/internal/scheduler/nova/pipeline_test.go +++ b/internal/scheduler/nova/pipeline_test.go @@ -330,3 +330,27 @@ func TestPremodifier_ModifyRequest_PreservesOtherFields(t *testing.T) { t.Error("original host weight should have been replaced") } } + +// Test that the consumer handles missing flavor data correctly +func TestConsumerMissingFlavorData(t *testing.T) { + consumer := &novaPipelineConsumer{Client: nil} + + request := api.ExternalSchedulerRequest{ + Context: api.NovaRequestContext{ + RequestID: "test-request-id", + }, + Spec: api.NovaObject[api.NovaSpec]{ + Data: api.NovaSpec{ + InstanceUUID: "test-uuid", + Flavor: api.NovaObject[api.NovaFlavor]{ + Data: api.NovaFlavor{ + Name: "", // Empty flavor name triggers missing data handling + }, + }, + }, + }, + } + + // Should handle missing flavor data without panic and use fallback values + consumer.Consume(request, []string{}, map[string]float64{}, map[string]map[string]float64{}) +} diff --git a/internal/scheduler/pipeline.go b/internal/scheduler/pipeline.go index 1b0ac0cd..24b97b9e 100644 --- a/internal/scheduler/pipeline.go +++ b/internal/scheduler/pipeline.go @@ -21,6 +21,9 @@ import ( type Pipeline[RequestType PipelineRequest] interface { // Run the scheduling pipeline with the given request. Run(request RequestType) ([]string, error) + + // Set the consumer that will receive the decisions. + SetConsumer(consumer SchedulingDecisionConsumer[RequestType]) } type Premodifier[RequestType PipelineRequest] interface { @@ -43,6 +46,13 @@ type pipeline[RequestType PipelineRequest] struct { mqttClient mqtt.Client // MQTT topic to publish telemetry data on when the pipeline is finished. mqttTopic string + + // Optional consumer to listen for the decisions. + Consumer SchedulingDecisionConsumer[RequestType] +} + +func (p *pipeline[RequestType]) SetConsumer(consumer SchedulingDecisionConsumer[RequestType]) { + p.Consumer = consumer } type StepWrapper[RequestType PipelineRequest] func(Step[RequestType], conf.SchedulerStepConfig) Step[RequestType] @@ -193,6 +203,15 @@ type TelemetryMessage[RequestType PipelineRequest] struct { Out map[string]float64 `json:"out"` } +type SchedulingDecisionConsumer[RequestType PipelineRequest] interface { + Consume( + request RequestType, + applicationOrder []string, + inWeights map[string]float64, + stepWeights map[string]map[string]float64, + ) +} + // Evaluate the pipeline and return a list of subjects in order of preference. func (p *pipeline[RequestType]) Run(request RequestType) ([]string, error) { slogArgs := request.GetTraceLogArgs() @@ -232,5 +251,9 @@ func (p *pipeline[RequestType]) Run(request RequestType) ([]string, error) { Out: outWeights, }) + if p.Consumer != nil { + go p.Consumer.Consume(request, p.applicationOrder, inWeights, stepWeights) + } + return subjects, nil } diff --git a/internal/scheduler/pipeline_test.go b/internal/scheduler/pipeline_test.go index b84a0c4c..734b8806 100644 --- a/internal/scheduler/pipeline_test.go +++ b/internal/scheduler/pipeline_test.go @@ -11,6 +11,7 @@ import ( "github.com/cobaltcore-dev/cortex/internal/conf" "github.com/cobaltcore-dev/cortex/internal/db" "github.com/cobaltcore-dev/cortex/testlib/mqtt" + "k8s.io/client-go/rest" ) type mockPipelineStep struct { @@ -227,6 +228,11 @@ func TestNewPipeline(t *testing.T) { database := db.DB{} // Mock or initialize as needed monitor := PipelineMonitor{} // Replace with an actual mock implementation if available mqttClient := &mqtt.MockClient{} + + // Set up kubekonfig for GetConfigOrDie + restConfig := &rest.Config{} + _ = restConfig + supportedSteps := map[string]func() Step[mockPipelineRequest]{ "mock_pipeline_step": func() Step[mockPipelineRequest] { return &mockPipelineStep{ diff --git a/visualizer/Dockerfile b/visualizer/Dockerfile index 5ab7a8ee..c83db471 100644 --- a/visualizer/Dockerfile +++ b/visualizer/Dockerfile @@ -6,3 +6,4 @@ COPY vendor/mqtt.min.js /usr/share/nginx/html/mqtt.min.js COPY nova.html /usr/share/nginx/html/nova.html COPY manila.html /usr/share/nginx/html/manila.html COPY shared.css /usr/share/nginx/html/shared.css +COPY favicon.ico /usr/share/nginx/html/favicon.ico diff --git a/visualizer/favicon.ico b/visualizer/favicon.ico new file mode 100644 index 00000000..b4f9d5fb Binary files /dev/null and b/visualizer/favicon.ico differ diff --git a/visualizer/manila.html b/visualizer/manila.html index 91eed41f..26786547 100644 --- a/visualizer/manila.html +++ b/visualizer/manila.html @@ -3,194 +3,198 @@ - - Cortex Manila Visualizer - - - - - - - - - - - -
- -
-
Waiting for mqtt data to arrive...
-
- -
- - - -
- - + + +
+ +
+
Waiting for mqtt data to arrive...
+
+ +
+ + + +
+ + - + } + + + \ No newline at end of file diff --git a/visualizer/nova.html b/visualizer/nova.html index 7f22ca58..0fbef361 100644 --- a/visualizer/nova.html +++ b/visualizer/nova.html @@ -1,15 +1,11 @@ - - - Cortex Nova Visualizer + - - + + + +
+
+ +
Cortex Nova Visualizer
+
- - + -
- - - -
+
+
+
+
Loading...
+ +
+
+
+ + \ No newline at end of file diff --git a/visualizer/shared.css b/visualizer/shared.css index c04e4a45..3c4f80f2 100644 --- a/visualizer/shared.css +++ b/visualizer/shared.css @@ -24,60 +24,72 @@ body { /* Nice animated progress bar on top of the page. */ .progress { position: fixed; - top: 0; left: 0; right: 0; + top: 0; + left: 0; + right: 0; height: 0.5em; background: var(--color-primary); z-index: 1000; } + .progress::before { content: ''; position: absolute; - top: 0; left: 0; right: 0; + top: 0; + left: 0; + right: 0; height: 0.5em; background: var(--color-secondary); animation: progress 2s infinite; } + @keyframes progress { - 0% { left: -100%; right: 100%; } - 100% { left: 100%; right: -100%; } + 0% { + left: -100%; + right: 100%; + } + + 100% { + left: 100%; + right: -100%; + } } + .progress-text { position: fixed; - top: 2em; left: 0; right: 0; + top: 2em; + left: 0; + right: 0; text-align: center; font-weight: bold; } /* Navbar that shows information. */ nav { - position: fixed; - top: 0; left: 0; right: 0; padding-left: 0.25em; background: var(--color-surface); box-shadow: 0 0 1em rgba(0, 0, 0, 0.1); z-index: 1; } + nav div.element { display: inline-block; - padding-top: 1em; padding-bottom: 2em; - padding-left: 1em; padding-right: 1em; + padding-top: 1em; + padding-bottom: 2em; + padding-left: 1em; + padding-right: 1em; margin: 0; background: var(--color-surface); color: var(--color-on-surface); border-right: 2px solid var(--color-background); font-size: 1em; } + nav div.element p.highlight { font-size: 1.25em; font-weight: bold; } -main { - /* Space for the description */ - padding-top: 9em; - padding-left: 0.5em; -} - table { /* Revert the default spacing used by the browser. */ border-spacing: 0; @@ -89,25 +101,39 @@ td.weight { position: relative; animation: weightAnimation 0.25s ease-in-out; } + td.weight div { border-radius: 0.5em; padding: 0.5em; margin: 0.5em; border: 2px solid var(--color-surface); } + /* Backdrop white for the weight cells */ td.weight::after { content: ''; position: absolute; - --m: 0.6em; top: var(--m); bottom: var(--m); left: var(--m); right: var(--m); + --m: 0.6em; + top: var(--m); + bottom: var(--m); + left: var(--m); + right: var(--m); border-radius: 0.5em; background: var(--color-surface); z-index: -1; } + /* Animation for weights when they first appear */ @keyframes weightAnimation { - 0% { opacity: 0; transform: scale(0.5); } - 100% { opacity: 1; transform: scale(1); } + 0% { + opacity: 0; + transform: scale(0.5); + } + + 100% { + opacity: 1; + transform: scale(1); + } } /* Table cell showing the hostname/name. */ @@ -115,6 +141,7 @@ th.hostname { text-align: center; position: relative; } + th.hostname div { position: relative; padding: 0.1em; @@ -131,10 +158,12 @@ th.metainfo { text-align: center; position: relative; } + th.metainfo div p { width: 6em; overflow: hidden; } + th.metainfo div p.issue { color: var(--color-tertiary); border-radius: 0.5em; @@ -162,12 +191,16 @@ td.chart { position: relative; height: 24em; } + td.chart div.barsbefore, td.chart div.barsafter, td.chart div.backdrop, td.chart div.stats { position: absolute; - top: 0; left: 0; right: 0; bottom: 0; + top: 0; + left: 0; + right: 0; + bottom: 0; display: flex; margin-top: 1.5em; margin-bottom: 0.5em; @@ -177,6 +210,7 @@ td.chart div.stats { justify-content: center; align-items: flex-end; } + td.chart div.barsbefore p, td.chart div.barsafter p, td.chart div.backdrop p, @@ -186,44 +220,73 @@ td.chart div.stats p { display: flex; border-radius: 0.2em; } + td.chart div.backdrop p { height: 100%; border-radius: 0.2em; border: 1px solid rgba(0, 0, 0, 0.05); background: white; } + td.chart div.stats { text-align: center; display: flex; justify-content: center; align-items: flex-start; } + td.chart div.stats p { writing-mode: vertical-lr; text-orientation: mixed; display: flex; font-size: 1em; font-weight: bold; - margin-left: 0.1em; margin-right: 0.1em; + margin-left: 0.1em; + margin-right: 0.1em; justify-content: center; align-items: center; } + /* Animation for chart bars */ td.chart div.barsafter p, td.chart div.barsbefore p { animation: barAnim 0.25s ease-in-out; overflow: hidden; } + @keyframes barAnim { - 0% { transform: scaleY(0); } - 100% { transform: scaleY(1); } + 0% { + transform: scaleY(0); + } + + 100% { + transform: scaleY(1); + } +} + +td.chart div.barsafter p.cpu { + background: var(--color-primary); +} + +td.chart div.barsafter p.mem { + background: var(--color-primary); +} + +td.chart div.barsafter p.disk { + background: var(--color-primary); +} + +td.chart div.barsbefore p.cpu { + background: var(--color-secondary); +} + +td.chart div.barsbefore p.mem { + background: var(--color-secondary); +} + +td.chart div.barsbefore p.disk { + background: var(--color-secondary); } -td.chart div.barsafter p.cpu { background: var(--color-primary); } -td.chart div.barsafter p.mem { background: var(--color-primary); } -td.chart div.barsafter p.disk { background: var(--color-primary); } -td.chart div.barsbefore p.cpu { background: var(--color-secondary); } -td.chart div.barsbefore p.mem { background: var(--color-secondary); } -td.chart div.barsbefore p.disk { background: var(--color-secondary); } /* Style for the input and button */ @@ -233,6 +296,7 @@ td.chart div.barsbefore p.disk { background: var(--color-secondary); } align-items: center; margin: 1em; } + .mqtt-url-input input { padding: 0.5em; font-size: 1em; @@ -241,6 +305,7 @@ td.chart div.barsbefore p.disk { background: var(--color-secondary); } margin-right: 0.5em; min-width: 12em; } + .mqtt-url-input button { padding: 0.5em 1em; margin-right: 0.5em; @@ -251,7 +316,8 @@ td.chart div.barsbefore p.disk { background: var(--color-secondary); } border-radius: 0.25em; cursor: pointer; } + .mqtt-url-input button:hover { background: var(--color-secondary); color: var(--color-on-secondary); -} +} \ No newline at end of file