Skip to content

Commit

Permalink
confgenerator: make instance an optional targetLabel
Browse files Browse the repository at this point in the history
This change does the following:
- Adds `instance` to the list of configurable targetLabels. It used to
  be non configurable but now it is, keeping it consistent with
  PodMonitoring
- Rename the curated metrics to be more consistent with https://cloud.google.com/monitoring/api/resources#tag_cloud_run_revision
  This will make joins with those metrics easier.
- Disallow using `instanceId` as a source label in relabelling rules.
  This is important because this label is added later in the pipeline.
- Add unit tests.
- Make self metrics add no additional labels other than the resource
  labels.

Change-Id: I5acd7ef51ec5f33ce0e8532a2e1e54b91bfaa63d
Signed-off-by: Ridwan Sharif <[email protected]>
  • Loading branch information
ridwanmsharif committed Nov 30, 2023
1 parent d3ef1ce commit 65f8d7b
Show file tree
Hide file tree
Showing 11 changed files with 179 additions and 90 deletions.
8 changes: 7 additions & 1 deletion confgenerator/agentmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,13 @@ func (r AgentSelfMetrics) OTelReceiverPipeline() otel.ReceiverPipeline {
otel.AggregateLabels("sum", "status"),
),
),
otel.TransformationMetrics(otel.AddMetricLabel("namespace", r.Service), otel.AddMetricLabel("cluster", "__run__")),
// Add appropriate resource and metric labels.
otel.GCPResourceDetector(),
otel.TransformationMetrics(
otel.AddMetricLabel("namespace", r.Service),
otel.AddMetricLabel("cluster", "__run__"),
otel.PrefixResourceAttribute("service.instance.id", "faas.id", ":"),
),
otel.GroupByGMPAttrs(),
},
}
Expand Down
44 changes: 36 additions & 8 deletions confgenerator/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ type ScrapeEndpoint struct {
Timeout string `yaml:"timeout,omitempty"`
// Relabeling rules for metrics scraped from this endpoint. Relabeling rules
// that override protected target labels (project_id, location, cluster,
// namespace, job, cloud_run_instance, or __address__) are not permitted.
// namespace, job, instance, instanceId or __address__) are not permitted.
MetricRelabeling []RelabelingRule `yaml:"metricRelabeling,omitempty"`
}

Expand Down Expand Up @@ -118,10 +118,19 @@ type ScrapeLimits struct {
LabelValueLength uint64 `yaml:"labelValueLength,omitempty"`
}

var allowedTargetMetadata = []string{"revision", "service", "configuration"}
var allowedTargetMetadata = []string{"instance", "revision", "service", "configuration"}

const kind = "RunMonitoring"
const apiVersion = "monitoring.googleapis.com/v1beta"
const (
kind = "RunMonitoring"
apiVersion = "monitoring.googleapis.com/v1beta"

// Metric labels names that will be added to metrics based on the RunTargetLabels.Metadata
// configuration.
cloudRunInstanceLabel = "instanceId"
cloudRunServiceLabel = "service_name"
cloudRunRevisionLabel = "revision_name"
cloudRunConfigurationLabel = "configuration_name"
)

// DefaultRunMonitoringConfig creates a config that will be used by default if
// no user config (or an empty one) is found. It scrapes the default location of
Expand Down Expand Up @@ -192,6 +201,21 @@ func (rc *RunMonitoringConfig) OTelReceiverPipeline() (*otel.ReceiverPipeline, e
if err != nil {
return nil, err
}

// Prefix the `instance` resource label with the faas.id.
processors := []otel.Component{
otel.GCPResourceDetector(),
otel.TransformationMetrics(otel.PrefixResourceAttribute("service.instance.id", "faas.id", ":")),
}

// If the users configure to add the instance metadata, add it as a metric label.
if rc.Spec.TargetLabels.Metadata != nil && contains(*rc.Spec.TargetLabels.Metadata, "instance") {
processors = append(processors, otel.TransformationMetrics(otel.FlattenResourceAttribute("faas.id", cloudRunInstanceLabel)))
}

// Group by the GMP attributes.
processors = append(processors, otel.GroupByGMPAttrs())

return &otel.ReceiverPipeline{
Receiver: otel.Component{
Type: "prometheus",
Expand All @@ -205,7 +229,7 @@ func (rc *RunMonitoringConfig) OTelReceiverPipeline() (*otel.ReceiverPipeline, e
},
},
},
Processors: []otel.Component{otel.GroupByGMPAttrs()},
Processors: processors,
}, nil
}

Expand Down Expand Up @@ -264,23 +288,23 @@ func relabelingsForMetadata(keys map[string]struct{}, env *CloudRunEnvironment)
Action: relabel.Replace,
SourceLabels: prommodel.LabelNames{"__address__"},
Replacement: env.Service,
TargetLabel: "cloud_run_service",
TargetLabel: cloudRunServiceLabel,
})
}
if _, ok := keys["revision"]; ok {
res = append(res, &relabel.Config{
Action: relabel.Replace,
SourceLabels: prommodel.LabelNames{"__address__"},
Replacement: env.Revision,
TargetLabel: "cloud_run_revision",
TargetLabel: cloudRunRevisionLabel,
})
}
if _, ok := keys["configuration"]; ok {
res = append(res, &relabel.Config{
Action: relabel.Replace,
SourceLabels: prommodel.LabelNames{"__address__"},
Replacement: env.Configuration,
TargetLabel: "cloud_run_configuration",
TargetLabel: cloudRunConfigurationLabel,
})
}
return res
Expand Down Expand Up @@ -390,6 +414,10 @@ func endpointScrapeConfig(id string, ep ScrapeEndpoint, relabelCfgs []*relabel.C
// convertRelabelingRule converts the rule to a relabel configuration. An error is returned
// if the rule would modify one of the protected labels.
func convertRelabelingRule(r RelabelingRule) (*relabel.Config, error) {
if contains(r.SourceLabels, cloudRunInstanceLabel) {
return nil, fmt.Errorf("cannot relabel with action %q using source label %q", r.Action, cloudRunInstanceLabel)
}

rcfg := &relabel.Config{
// Upstream applies ToLower when digesting the config, so we allow the same.
Action: relabel.Action(strings.ToLower(r.Action)),
Expand Down
10 changes: 0 additions & 10 deletions confgenerator/otel/modular.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,16 +86,6 @@ func (c ModularConfig) Generate() (string, error) {
var processorNames []string
processorNames = append(processorNames, receiverProcessorNames...)

// Add the resource detector
processors["resourcedetection"] = GCPResourceDetector().Config
processorNames = append(processorNames, "resourcedetection")

// Add the serverless instance id as a metric label
transformProcessor := TransformationMetrics(FlattenResourceAttribute("faas.id", "cloud_run_instance"), PrefixResourceAttribute("service.instance.id", "cloud_run_instance", ":"))

processors["transform/instance"] = transformProcessor.Config
processorNames = append(processorNames, "transform/instance")

exporters["googlemanagedprometheus"] = c.Exporter.Config
pipelines["metrics/"+key] = map[string]interface{}{
"receivers": []string{receiverName},
Expand Down
6 changes: 3 additions & 3 deletions confgenerator/otel/processors.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,15 +179,15 @@ func FlattenResourceAttribute(resourceAttribute, metricAttribute string) Transfo
return TransformQuery(fmt.Sprintf(`set(attributes["%s"], resource.attributes["%s"])`, metricAttribute, resourceAttribute))
}

// PrefixResourceAttribute prefixes the resource attribute with another metric
// PrefixResourceAttribute prefixes the resource attribute with another resource
// attribute.
//
// Note: Mutating the resource attribute results in this update happening for
// each data point. Since the OTTL statement uses the resource attribute in
// both the target and the source labels, we must make sure after the first
// mutation, the subsequent transformations for the same resource is a no-op.
func PrefixResourceAttribute(resourceAttribute, metricAttribute, delimiter string) TransformQuery {
return TransformQuery(fmt.Sprintf(`replace_pattern(resource.attributes["%s"], "^(\\d+)$$", Concat([attributes["%s"], "$$1"], "%s"))`, resourceAttribute, metricAttribute, delimiter))
func PrefixResourceAttribute(destResourceAttribute, srcResourceAttribute, delimiter string) TransformQuery {
return TransformQuery(fmt.Sprintf(`replace_pattern(resource.attributes["%s"], "^(\\d+)$$", Concat([resource.attributes["%s"], "$$1"], "%s"))`, destResourceAttribute, srcResourceAttribute, delimiter))
}

// AddMetricLabel adds a new metric attribute. If it already exists, then it is overwritten.
Expand Down
34 changes: 19 additions & 15 deletions confgenerator/testdata/add-metadata-labels/golden/otel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ processors:
- otelcol_process_memory_rss
- otelcol_grpc_io_client_completed_rpcs
- otelcol_googlecloudmonitoring_point_count
groupbyattrs/application-metrics_0:
groupbyattrs/application-metrics_2:
keys:
- namespace
- cluster
groupbyattrs/run-gmp-self-metrics_3:
groupbyattrs/run-gmp-self-metrics_4:
keys:
- namespace
- cluster
Expand Down Expand Up @@ -66,23 +66,28 @@ processors:
aggregation_type: sum
label_set:
- status
resourcedetection:
resourcedetection/application-metrics_0:
detectors:
- gcp
- env
transform/instance:
resourcedetection/run-gmp-self-metrics_2:
detectors:
- gcp
- env
transform/application-metrics_1:
metric_statements:
context: datapoint
statements:
- set(attributes["cloud_run_instance"], resource.attributes["faas.id"])
- replace_pattern(resource.attributes["service.instance.id"], "^(\\d+)$$", Concat([attributes["cloud_run_instance"],
- replace_pattern(resource.attributes["service.instance.id"], "^(\\d+)$$", Concat([resource.attributes["faas.id"],
"$$1"], ":"))
transform/run-gmp-self-metrics_2:
transform/run-gmp-self-metrics_3:
metric_statements:
context: datapoint
statements:
- set(attributes["namespace"], "test_service")
- set(attributes["cluster"], "__run__")
- replace_pattern(resource.attributes["service.instance.id"], "^(\\d+)$$", Concat([resource.attributes["faas.id"],
"$$1"], ":"))
receivers:
prometheus/application-metrics:
allow_cumulative_resets: true
Expand All @@ -98,7 +103,7 @@ receivers:
enable_http2: false
relabel_configs:
- source_labels: [__address__]
target_label: cloud_run_service
target_label: service_name
replacement: test_service
action: replace
- source_labels: [__address__]
Expand Down Expand Up @@ -139,9 +144,9 @@ service:
exporters:
- googlemanagedprometheus
processors:
- groupbyattrs/application-metrics_0
- resourcedetection
- transform/instance
- resourcedetection/application-metrics_0
- transform/application-metrics_1
- groupbyattrs/application-metrics_2
receivers:
- prometheus/application-metrics
metrics/run-gmp-self-metrics:
Expand All @@ -150,10 +155,9 @@ service:
processors:
- filter/run-gmp-self-metrics_0
- metricstransform/run-gmp-self-metrics_1
- transform/run-gmp-self-metrics_2
- groupbyattrs/run-gmp-self-metrics_3
- resourcedetection
- transform/instance
- resourcedetection/run-gmp-self-metrics_2
- transform/run-gmp-self-metrics_3
- groupbyattrs/run-gmp-self-metrics_4
receivers:
- prometheus/run-gmp-self-metrics
telemetry:
Expand Down
44 changes: 27 additions & 17 deletions confgenerator/testdata/builtin/golden/otel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ processors:
- otelcol_process_memory_rss
- otelcol_grpc_io_client_completed_rpcs
- otelcol_googlecloudmonitoring_point_count
groupbyattrs/application-metrics_0:
groupbyattrs/application-metrics_3:
keys:
- namespace
- cluster
groupbyattrs/run-gmp-self-metrics_3:
groupbyattrs/run-gmp-self-metrics_4:
keys:
- namespace
- cluster
Expand Down Expand Up @@ -66,23 +66,33 @@ processors:
aggregation_type: sum
label_set:
- status
resourcedetection:
resourcedetection/application-metrics_0:
detectors:
- gcp
- env
transform/instance:
resourcedetection/run-gmp-self-metrics_2:
detectors:
- gcp
- env
transform/application-metrics_1:
metric_statements:
context: datapoint
statements:
- set(attributes["cloud_run_instance"], resource.attributes["faas.id"])
- replace_pattern(resource.attributes["service.instance.id"], "^(\\d+)$$", Concat([attributes["cloud_run_instance"],
- replace_pattern(resource.attributes["service.instance.id"], "^(\\d+)$$", Concat([resource.attributes["faas.id"],
"$$1"], ":"))
transform/run-gmp-self-metrics_2:
transform/application-metrics_2:
metric_statements:
context: datapoint
statements:
- set(attributes["instanceId"], resource.attributes["faas.id"])
transform/run-gmp-self-metrics_3:
metric_statements:
context: datapoint
statements:
- set(attributes["namespace"], "test_service")
- set(attributes["cluster"], "__run__")
- replace_pattern(resource.attributes["service.instance.id"], "^(\\d+)$$", Concat([resource.attributes["faas.id"],
"$$1"], ":"))
receivers:
prometheus/application-metrics:
allow_cumulative_resets: true
Expand All @@ -97,15 +107,15 @@ receivers:
enable_http2: false
relabel_configs:
- source_labels: [__address__]
target_label: cloud_run_service
target_label: service_name
replacement: test_service
action: replace
- source_labels: [__address__]
target_label: cloud_run_revision
target_label: revision_name
replacement: test_revision
action: replace
- source_labels: [__address__]
target_label: cloud_run_configuration
target_label: configuration_name
replacement: test_configuration
action: replace
- source_labels: [__address__]
Expand Down Expand Up @@ -146,9 +156,10 @@ service:
exporters:
- googlemanagedprometheus
processors:
- groupbyattrs/application-metrics_0
- resourcedetection
- transform/instance
- resourcedetection/application-metrics_0
- transform/application-metrics_1
- transform/application-metrics_2
- groupbyattrs/application-metrics_3
receivers:
- prometheus/application-metrics
metrics/run-gmp-self-metrics:
Expand All @@ -157,10 +168,9 @@ service:
processors:
- filter/run-gmp-self-metrics_0
- metricstransform/run-gmp-self-metrics_1
- transform/run-gmp-self-metrics_2
- groupbyattrs/run-gmp-self-metrics_3
- resourcedetection
- transform/instance
- resourcedetection/run-gmp-self-metrics_2
- transform/run-gmp-self-metrics_3
- groupbyattrs/run-gmp-self-metrics_4
receivers:
- prometheus/run-gmp-self-metrics
telemetry:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
invalid definition for endpoint with index 0: cannot relabel with action "replace" using source label "instanceId"
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: monitoring.googleapis.com/v1beta
kind: RunMonitoring
metadata:
name: mycollector
labels:
run-app: mycollector
type: mytype
spec:
endpoints:
- port: 8080
interval: 10s
metricRelabeling:
- action: replace
sourceLabels:
- instanceId
targetLabel: target
Loading

0 comments on commit 65f8d7b

Please sign in to comment.