Skip to content

Commit

Permalink
confgenerator: set monitored resource fields for ingested metrics
Browse files Browse the repository at this point in the history
This change does the following:
- Add unit tests
- Add instance id as a prefix to the `instance` label in the monitored
  reosurce
- Make instance a proteted label. It can't be mutated using relabel
  configs
- Add validation to the RunMonitoring config
- Add cluster and namespace to the agent self metrics

Change-Id: I4bc9d8dd577c0d95b5bca0aa6593644da7b3984b
Signed-off-by: Ridwan Sharif <[email protected]>
  • Loading branch information
ridwanmsharif committed Nov 24, 2023
1 parent 1757373 commit b78d95b
Show file tree
Hide file tree
Showing 17 changed files with 221 additions and 55 deletions.
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,10 @@ test:
test_verbose:
$(MAKE) GO_TEST_VERBOSE=-v test

.PHONY: test_update
test_update:
go test ./confgenerator -update

.PHONY: generate
generate:
go generate ./...
Expand Down
4 changes: 1 addition & 3 deletions clean-up-cloud-run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

set -ex

PROJECT_ID=$(gcloud config get-value project)
SA_NAME="run-gmp-sa"
REGION="us-east1"
Expand All @@ -24,4 +22,4 @@ gcloud secrets delete run-gmp-config
gcloud artifacts repositories delete run-gmp \
--location=${REGION} \
--quiet
gcloud iam service-accounts delete ${SA_NAME}@${PROJECT_ID}.iam.gserviceaccount.com
gcloud iam service-accounts delete ${SA_NAME}@${PROJECT_ID}.iam.gserviceaccount.com
9 changes: 9 additions & 0 deletions confgenerator/agentmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (

type AgentSelfMetrics struct {
Version string
Service string
Port int
}

Expand All @@ -37,6 +38,12 @@ func (r AgentSelfMetrics) OTelReceiverPipeline() otel.ReceiverPipeline {
"static_configs": []map[string]interface{}{{
"targets": []string{fmt.Sprintf("0.0.0.0:%d", r.Port)},
}},
"metric_relabel_configs": []map[string]interface{}{{
"source_labels": []string{"__address__"},
"target_label": "instance",
"replacement": fmt.Sprintf("%d", r.Port),
"action": "replace",
}},
}},
},
},
Expand Down Expand Up @@ -81,6 +88,8 @@ func (r AgentSelfMetrics) OTelReceiverPipeline() otel.ReceiverPipeline {
otel.AggregateLabels("sum", "status"),
),
),
otel.TransformationMetrics(otel.AddMetricLabel("namespace", r.Service), otel.AddMetricLabel("cluster", "__run__")),
otel.GroupByGMPAttrs(),
},
}
}
Expand Down
1 change: 1 addition & 0 deletions confgenerator/confgenerator.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ func (rc *RunMonitoringConfig) GenerateOtelConfig(ctx context.Context) (string,
receiverPipelines["run-gmp-self-metrics"] = AgentSelfMetrics{
Version: metricVersionLabel,
Port: selfMetricsPort,
Service: rc.Env.Service,
}.OTelReceiverPipeline()

otelConfig, err := otel.ModularConfig{
Expand Down
32 changes: 28 additions & 4 deletions confgenerator/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,15 +120,17 @@ type ScrapeLimits struct {

var allowedTargetMetadata = []string{"revision", "service", "configuration"}

const kind = "RunMonitoring"
const apiVersion = "monitoring.googleapis.com/v1beta"

// DefaultRunMonitoringConfig creates a config that will be used by default if
// no user config (or an empty one) is found. It scrapes the default location of
// 0.0.0.0:8080/metrics for prometheus metrics.
func DefaultRunMonitoringConfig() *RunMonitoringConfig {

return &RunMonitoringConfig{
metav1.TypeMeta{
Kind: "RunMonitoring",
APIVersion: "monitoring.googleapis.com/v1beta",
Kind: kind,
APIVersion: apiVersion,
},
metav1.ObjectMeta{
Name: "run-gmp-sidecar",
Expand Down Expand Up @@ -176,6 +178,10 @@ func ReadConfigFromFile(ctx context.Context, path string) (*RunMonitoringConfig,
return nil, err
}

// Validate the RunMonitoring config
if err := config.Validate(); err != nil {
return nil, err
}
return config, nil
}

Expand Down Expand Up @@ -203,6 +209,18 @@ func (rc *RunMonitoringConfig) OTelReceiverPipeline() (*otel.ReceiverPipeline, e
}, nil
}

// Validate validates the RunMonitoring config.
func (rc *RunMonitoringConfig) Validate() error {
if rc.APIVersion != apiVersion {
return fmt.Errorf("apiVersion must be %s", apiVersion)
}
if rc.Kind != kind {
return fmt.Errorf("kind must be %s", kind)
}

return nil
}

// scrapeConfigs converts the given RunMonitoringConfig to an equivalent set of Prometheus ScrapeConfigs.
func (rc *RunMonitoringConfig) scrapeConfigs() (res []*promconfig.ScrapeConfig, err error) {
for i := range rc.Spec.Endpoints {
Expand Down Expand Up @@ -292,11 +310,17 @@ func endpointScrapeConfig(id string, ep ScrapeEndpoint, relabelCfgs []*relabel.C
TargetLabel: "namespace",
Replacement: env.Service,
},
// The `instance` label will be <faas.id>:<port> in the final metric.
// But since <faas.id> is unavailable until the gcp resource detector
// runs later in the pipeline we just populate the port for now.
//
// See the usage of PrefixResourceAttribute for when the rest of the
// instance label is filled in.
&relabel.Config{
Action: relabel.Replace,
SourceLabels: prommodel.LabelNames{"__address__"},
TargetLabel: "instance",
Replacement: env.Revision + ":" + ep.Port,
Replacement: ep.Port,
},
)

Expand Down
7 changes: 2 additions & 5 deletions confgenerator/otel/modular.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,9 @@ func (c ModularConfig) Generate() (string, error) {
processors["resourcedetection"] = GCPResourceDetector().Config
processorNames = append(processorNames, "resourcedetection")

// Add serverless ID detector
processors["resource/serverless"] = AddResourceAttr("service.instance.id", "faas.id").Config
processorNames = append(processorNames, "resource/serverless")

// Add the serverless instance id as a metric label
transformProcessor := TransformationMetrics(FlattenResourceAttribute("faas.id", "cloud_run_instance"))
transformProcessor := TransformationMetrics(FlattenResourceAttribute("faas.id", "cloud_run_instance"), PrefixResourceAttribute("service.instance.id", "cloud_run_instance", ":"))

processors["transform/instance"] = transformProcessor.Config
processorNames = append(processorNames, "transform/instance")

Expand Down
33 changes: 28 additions & 5 deletions confgenerator/otel/processors.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,15 +103,22 @@ func AggregateLabels(aggregationType string, labels ...string) map[string]interf
}
}

// GroupByGMPAttrs moves the "namespace", "cluster", and "location"
// metric attributes to resource attributes. The
// googlemanagedprometheus exporter will use these resource attributes
// to populate metric labels.
// GroupByGMPAttrs moves the "namespace" and "cluster" metric attributes to
// resource attributes.
//
// Metrics coming from run-gmp-sidecar are written against the
// `prometheus_target` monitored resource in Cloud Monitoring. The labels for
// these monitored resources come from the OTel resource labels. As a result,
// this processor needs to promote certain metric labels to resource labels so
// the translation can happen correctly.
//
// See https://cloud.google.com/monitoring/api/resources#tag_prometheus_target
// for more information about the monitored resource used.
func GroupByGMPAttrs() Component {
return Component{
Type: "groupbyattrs",
Config: map[string]interface{}{
"keys": []string{"namespace", "cluster", "location"},
"keys": []string{"namespace", "cluster"},
},
}
}
Expand Down Expand Up @@ -171,3 +178,19 @@ type TransformQuery string
func FlattenResourceAttribute(resourceAttribute, metricAttribute string) TransformQuery {
return TransformQuery(fmt.Sprintf(`set(attributes["%s"], resource.attributes["%s"])`, metricAttribute, resourceAttribute))
}

// PrefixResourceAttribute prefixes the resource attribute with another metric
// attribute.
//
// Note: Mutating the resource attribute results in this update happening for
// each data point. Since the OTTL statement uses the resource attribute in
// both the target and the source labels, we must make sure after the first
// mutation, the subsequent transformations for the same resource is a no-op.
func PrefixResourceAttribute(resourceAttribute, metricAttribute, delimiter string) TransformQuery {
return TransformQuery(fmt.Sprintf(`replace_pattern(resource.attributes["%s"], "^(\\d+)$$", Concat([attributes["%s"], "$$1"], "%s"))`, resourceAttribute, metricAttribute, delimiter))
}

// AddMetricLabel adds a new metric attribute. If it already exists, then it is overwritten.
func AddMetricLabel(key, val string) TransformQuery {
return TransformQuery(fmt.Sprintf(`set(attributes["%s"], "%s")`, key, val))
}
30 changes: 21 additions & 9 deletions confgenerator/testdata/add-metadata-labels/golden/otel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ processors:
keys:
- namespace
- cluster
- location
groupbyattrs/run-gmp-self-metrics_3:
keys:
- namespace
- cluster
metricstransform/run-gmp-self-metrics_1:
transforms:
- action: update
Expand Down Expand Up @@ -61,11 +64,6 @@ processors:
aggregation_type: sum
label_set:
- status
resource/serverless:
attributes:
action: insert
from_attribute: faas.id
key: service.instance.id
resourcedetection:
detectors:
- gcp
Expand All @@ -75,6 +73,14 @@ processors:
context: datapoint
statements:
- set(attributes["cloud_run_instance"], resource.attributes["faas.id"])
- replace_pattern(resource.attributes["service.instance.id"], "^(\\d+)$$", Concat([attributes["cloud_run_instance"],
"$$1"], ":"))
transform/run-gmp-self-metrics_2:
metric_statements:
context: datapoint
statements:
- set(attributes["namespace"], "test_service")
- set(attributes["cluster"], "__run__")
receivers:
prometheus/application-metrics:
allow_cumulative_resets: true
Expand Down Expand Up @@ -103,7 +109,7 @@ receivers:
action: replace
- source_labels: [__address__]
target_label: instance
replacement: test_revision:8080
replacement: "8080"
action: replace
static_configs:
- targets:
Expand All @@ -115,6 +121,12 @@ receivers:
config:
scrape_configs:
- job_name: run-gmp-sidecar
metric_relabel_configs:
- action: replace
replacement: "42"
source_labels:
- __address__
target_label: instance
scrape_interval: 1m
static_configs:
- targets:
Expand All @@ -127,7 +139,6 @@ service:
processors:
- groupbyattrs/application-metrics_0
- resourcedetection
- resource/serverless
- transform/instance
receivers:
- prometheus/application-metrics
Expand All @@ -137,8 +148,9 @@ service:
processors:
- filter/run-gmp-self-metrics_0
- metricstransform/run-gmp-self-metrics_1
- transform/run-gmp-self-metrics_2
- groupbyattrs/run-gmp-self-metrics_3
- resourcedetection
- resource/serverless
- transform/instance
receivers:
- prometheus/run-gmp-self-metrics
Expand Down
30 changes: 21 additions & 9 deletions confgenerator/testdata/builtin/golden/otel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ processors:
keys:
- namespace
- cluster
- location
groupbyattrs/run-gmp-self-metrics_3:
keys:
- namespace
- cluster
metricstransform/run-gmp-self-metrics_1:
transforms:
- action: update
Expand Down Expand Up @@ -61,11 +64,6 @@ processors:
aggregation_type: sum
label_set:
- status
resource/serverless:
attributes:
action: insert
from_attribute: faas.id
key: service.instance.id
resourcedetection:
detectors:
- gcp
Expand All @@ -75,6 +73,14 @@ processors:
context: datapoint
statements:
- set(attributes["cloud_run_instance"], resource.attributes["faas.id"])
- replace_pattern(resource.attributes["service.instance.id"], "^(\\d+)$$", Concat([attributes["cloud_run_instance"],
"$$1"], ":"))
transform/run-gmp-self-metrics_2:
metric_statements:
context: datapoint
statements:
- set(attributes["namespace"], "test_service")
- set(attributes["cluster"], "__run__")
receivers:
prometheus/application-metrics:
allow_cumulative_resets: true
Expand Down Expand Up @@ -110,7 +116,7 @@ receivers:
action: replace
- source_labels: [__address__]
target_label: instance
replacement: test_revision:8080
replacement: "8080"
action: replace
static_configs:
- targets:
Expand All @@ -122,6 +128,12 @@ receivers:
config:
scrape_configs:
- job_name: run-gmp-sidecar
metric_relabel_configs:
- action: replace
replacement: "42"
source_labels:
- __address__
target_label: instance
scrape_interval: 1m
static_configs:
- targets:
Expand All @@ -134,7 +146,6 @@ service:
processors:
- groupbyattrs/application-metrics_0
- resourcedetection
- resource/serverless
- transform/instance
receivers:
- prometheus/application-metrics
Expand All @@ -144,8 +155,9 @@ service:
processors:
- filter/run-gmp-self-metrics_0
- metricstransform/run-gmp-self-metrics_1
- transform/run-gmp-self-metrics_2
- groupbyattrs/run-gmp-self-metrics_3
- resourcedetection
- resource/serverless
- transform/instance
receivers:
- prometheus/run-gmp-self-metrics
Expand Down
1 change: 1 addition & 0 deletions confgenerator/testdata/invalid-apiversion/golden/error
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
apiVersion must be monitoring.googleapis.com/v1beta
25 changes: 25 additions & 0 deletions confgenerator/testdata/invalid-apiversion/input.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: invalid_api_version
kind: RunMonitoring
metadata:
name: mycollector
labels:
run-app: mycollector
type: mytype
spec:
endpoints:
- port: 8080
interval: 10s
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
invalid definition for endpoint with index 0: cannot relabel with action "replace" onto protected label "instance"
Loading

0 comments on commit b78d95b

Please sign in to comment.