Skip to content

Commit

Permalink
feat: use cosi-bucket-kit chart
Browse files Browse the repository at this point in the history
Signed-off-by: Tarun Gupta Akirala <[email protected]>
  • Loading branch information
takirala committed Dec 21, 2024
1 parent 7356635 commit cb540e7
Show file tree
Hide file tree
Showing 8 changed files with 225 additions and 187 deletions.
46 changes: 46 additions & 0 deletions services/centralized-kubecost/2.5.0/cosi-storage/cosi-bucket.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
---
# TODO: delete after merging the cosi-bucket-kit chart
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:
name: takirala
namespace: kommander-flux
spec:
interval: 10m
timeout: 1m
url: https://takirala.github.io/charts/stable
---
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: kubecost-cosi-storage
namespace: ${releaseNamespace}
spec:
chart:
spec:
chart: cosi-bucket-kit
sourceRef:
kind: HelmRepository
name: takirala
#name: mesosphere.github.io-charts-stable
namespace: kommander-flux
version: 0.0.1-alpha.0
interval: 15s
install:
crds: CreateReplace
remediation:
retries: 30
createNamespace: true
upgrade:
crds: CreateReplace
remediation:
retries: 30
releaseName: kubecost-cosi-storage
targetNamespace: ${releaseNamespace}
valuesFrom:
- kind: ConfigMap
name: centralized-kubecost-2.5.0-d2iq-defaults
- kind: ConfigMap
name: centralized-kubecost-overrides
optional: true
---
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- move-to-rook-ceph-cluster-driver.yaml
- todo-create-a-new-chart-in-mesosphere_charts_stable.yaml
- cosi-bucket.yaml

This file was deleted.

This file was deleted.

170 changes: 54 additions & 116 deletions services/centralized-kubecost/2.5.0/defaults/cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ data:
# kubecostAggregator.deployMethod:
# kA.dM = "singlepod" -> cloudCost is run as container inside cost-analyzer
# kA.dM = "statefulset" -> cloudCost is run as single-replica Deployment
enabled: false # TODO: document how to enable here
enabled: false
# Log level for the aggregator container. Options are "trace", "debug", "info", "warn", "error", "fatal", "panic"
logLevel: info
resources:
Expand Down Expand Up @@ -66,7 +66,6 @@ data:
forecasting:
# Enable this to use kubecost's cost forecosting model
# TODO(takirala): do we enable this and create yet another pod or disable this but ship the image for airgap bundle?
enabled: false
# Define persistence volume for cost-analyzer, more information at https://github.com/kubecost/docs/blob/master/storage.md
Expand Down Expand Up @@ -95,129 +94,22 @@ data:
enabled: false
prometheus:
kube-state-metrics:
fullnameOverride: "kommander-kubecost-prometheus-kube-state-metrics"
priorityClassName: dkp-high-priority
extraScrapeConfigs: |
- job_name: kubecost
honor_labels: true
scrape_interval: 1m
scrape_timeout: 10s
metrics_path: /metrics
scheme: http
dns_sd_configs:
- names:
- {{ .Release.Name }}-cost-analyzer
type: 'A'
port: 9003
- job_name: kubecost-networking
kubernetes_sd_configs:
- role: pod
relabel_configs:
# Scrape only the the targets matching the following metadata
- source_labels: [__meta_kubernetes_pod_label_app]
action: keep
regex: {{ .Release.Name }}-network-costs
fullnameOverride: "kommander-kubecost-prometheus"
server:
fullnameOverride: "kommander-kubecost-prometheus-server"
image:
repository: quay.io/prometheus/prometheus
tag: v2.55.0
# If clusterIDConfigmap is defined, instead use user-generated configmap with key CLUSTER_ID
# to use as unique cluster ID in kubecost cost-analyzer deployment.
# This overrides the cluster_id set in prometheus.server.global.external_labels.
# NOTE: This does not affect the external_labels set in prometheus config.
clusterIDConfigmap: kubecost-cluster-info-configmap
extraFlags:
- web.enable-admin-api
- web.enable-lifecycle
- storage.tsdb.wal-compression
resources:
limits:
cpu: 1000m
memory: 2500Mi
requests:
cpu: 300m
memory: 1500Mi
priorityClassName: dkp-high-priority
global:
scrape_interval: 1m
scrape_timeout: 10s
evaluation_interval: 1m
external_labels:
cluster_id: $CLUSTER_ID
persistentVolume:
size: 32Gi
enabled: true
extraArgs:
log.level: info
log.format: json
storage.tsdb.min-block-duration: 2h
storage.tsdb.max-block-duration: 2h
query.max-concurrency: 1
query.max-samples: 100000000
enableAdminApi: true
service:
gRPC:
enabled: true
priorityClassName: dkp-high-priority
configmapReload:
prometheus:
enabled: true
#image:
#repository: ghcr.io/jimmidyson/configmap-reload
#tag: v0.14.0
alertmanager:
enabled: true
#image:
#repository: ghcr.io/jimmidyson/configmap-reload
#tag: v0.14.0
alertmanager:
fullnameOverride: "kommander-kubecost-prometheus-alertmanager"
priorityClassName: dkp-high-priority
enabled: true
image:
repository: quay.io/prometheus/alertmanager
tag: v0.27.0
resources:
limits:
cpu: 50m
memory: 100Mi
requests:
cpu: 10m
memory: 50Mi
persistentVolume:
enabled: true
pushgateway:
enabled: false
persistentVolume:
enabled: false
serverFiles:
alerts:
groups:
- name: Kubecost
rules:
- alert: kubecostDown
expr: up{job="kubecost"} == 0
annotations:
message: 'Kubecost metrics endpoint is not being scraped successfully.'
for: 10m
labels:
severity: warning
- alert: kubecostMetricsUnavailable
expr: sum(sum_over_time(node_cpu_hourly_cost[5m])) == 0
annotations:
message: 'Kubecost metrics are not available in Prometheus.'
for: 10m
labels:
severity: warning
- alert: kubecostRecordingRulesNotEvaluated
expr: avg_over_time(kubecost_cluster_memory_working_set_bytes[5m]) == 0
annotations:
message: 'Kubecost recording rules are not being successfully evaluated.'
for: 10m
labels:
severity: warning
kube-state-metrics:
fullnameOverride: "kommander-kubecost-prometheus-kube-state-metrics"
priorityClassName: dkp-high-priority
grafana:
sidecar:
Expand All @@ -234,10 +126,56 @@ data:
kubecostProductConfigs:
grafanaURL: "/dkp/kommander/monitoring/grafana"
# used for display in Kubecost UI
clusterName: ""
clusterProfile: production
cloudIntegrationSecret: # TODO(takirala): Do we want to enable this by default?
cloudIntegrationSecret: ""
productKey:
enabled: false
#key: YOUR_KEY
# COSI related resources
bucketClasses: # Cluster scoped resource
- name: kubecost-cosi-storage
driverName: rook-ceph.ceph.objectstorage.k8s.io
deletionPolicy: Delete
parameters:
objectStoreUserSecretName: rook-ceph-object-user-dkp-object-store-cosi-admin
objectStoreUserSecretNamespace: kommander
bucketAccessClasses: # Cluster scoped resource
- name: kubecost-cosi-storage
driverName: rook-ceph.ceph.objectstorage.k8s.io
authenticationType: KEY
parameters:
objectStoreUserSecretName: rook-ceph-object-user-dkp-object-store-cosi-admin
objectStoreUserSecretNamespace: kommander
bucketClaims: # Namespace scoped resource
- name: kubecost-cosi-storage
namespace: kubecost
bucketClassName: kubecost-cosi-storage
protocols:
- s3
bucketAccesses: # Namespace scoped resource
- name: kubecost-cosi-storage
namespace: kubecost
bucketAccessClassName: kubecost-cosi-storage
bucketClaimName: kubecost-cosi-storage
protocol: s3
credentialsSecretName: federated-store
cosiProviders:
ceph:
driver:
enabled: true
name: ceph-cosi-driver
namespace: kommander
spec:
deploymentStrategy: Auto
adminuser:
enabled: true
name: cosi-admin
namespace: kommander
spec:
displayName: "ceph cosi admin"
store: dkp-object-store # name of the CephObjectStore
capabilities:
bucket: "*"
user: "*"
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# TODO: https://jira.nutanix.com/browse/NCN-104793 and https://jira.nutanix.com/browse/NCN-104743
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
Expand All @@ -9,9 +10,10 @@ spec:
chart: cosi
sourceRef:
kind: HelmRepository
name: mesosphere.github.io-charts-stable
name: takirala
# name: mesosphere.github.io-charts-stable
namespace: kommander-flux
version: 0.0.1-alpha.1
version: 0.0.1-alpha.2
interval: 15s
install:
crds: CreateReplace
Expand Down
3 changes: 3 additions & 0 deletions services/centralized-kubecost/2.5.0/release/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ spec:
valuesFrom:
- kind: ConfigMap
name: centralized-kubecost-2.5.0-d2iq-defaults
- kind: ConfigMap
name: centralized-kubecost-overrides
optional: true
targetNamespace: kubecost
---
apiVersion: v1
Expand Down
Loading

0 comments on commit cb540e7

Please sign in to comment.