-
Notifications
You must be signed in to change notification settings - Fork 3.9k
otel: subchannel metrics #12202
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
otel: subchannel metrics #12202
Changes from all commits
a06568c
daf901b
735665b
a664b2f
2ce087e
2293e7f
623c0f9
c713561
ca8e9ed
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
/* | ||
* Copyright 2025 The gRPC Authors | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package io.grpc; | ||
|
||
import java.util.List; | ||
|
||
/** | ||
* Represents a long-valued up down counter metric instrument. | ||
*/ | ||
@Internal | ||
public final class LongUpDownCounterMetricInstrument extends PartialMetricInstrument { | ||
public LongUpDownCounterMetricInstrument(int index, String name, String description, String unit, | ||
List<String> requiredLabelKeys, | ||
List<String> optionalLabelKeys, | ||
boolean enableByDefault) { | ||
super(index, name, description, unit, requiredLabelKeys, optionalLabelKeys, enableByDefault); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,6 +48,9 @@ | |
import io.grpc.LoadBalancer; | ||
import io.grpc.Metadata; | ||
import io.grpc.MethodDescriptor; | ||
import io.grpc.MetricRecorder; | ||
import io.grpc.NameResolver; | ||
import io.grpc.SecurityLevel; | ||
import io.grpc.Status; | ||
import io.grpc.SynchronizationContext; | ||
import io.grpc.SynchronizationContext.ScheduledHandle; | ||
|
@@ -160,6 +163,8 @@ protected void handleNotInUse() { | |
private Status shutdownReason; | ||
|
||
private volatile Attributes connectedAddressAttributes; | ||
private final SubchannelMetrics subchannelMetrics; | ||
private final String target; | ||
|
||
InternalSubchannel(LoadBalancer.CreateSubchannelArgs args, String authority, String userAgent, | ||
BackoffPolicy.Provider backoffPolicyProvider, | ||
|
@@ -168,7 +173,9 @@ protected void handleNotInUse() { | |
Supplier<Stopwatch> stopwatchSupplier, SynchronizationContext syncContext, | ||
Callback callback, InternalChannelz channelz, CallTracer callsTracer, | ||
ChannelTracer channelTracer, InternalLogId logId, | ||
ChannelLogger channelLogger, List<ClientTransportFilter> transportFilters) { | ||
ChannelLogger channelLogger, List<ClientTransportFilter> transportFilters, | ||
String target, | ||
MetricRecorder metricRecorder) { | ||
List<EquivalentAddressGroup> addressGroups = args.getAddresses(); | ||
Preconditions.checkNotNull(addressGroups, "addressGroups"); | ||
Preconditions.checkArgument(!addressGroups.isEmpty(), "addressGroups is empty"); | ||
|
@@ -192,6 +199,8 @@ protected void handleNotInUse() { | |
this.channelLogger = Preconditions.checkNotNull(channelLogger, "channelLogger"); | ||
this.transportFilters = transportFilters; | ||
this.reconnectDisabled = args.getOption(LoadBalancer.DISABLE_SUBCHANNEL_RECONNECT_KEY); | ||
this.target = target; | ||
this.subchannelMetrics = new SubchannelMetrics(metricRecorder); | ||
} | ||
|
||
ChannelLogger getChannelLogger() { | ||
|
@@ -593,6 +602,15 @@ public void run() { | |
pendingTransport = null; | ||
connectedAddressAttributes = addressIndex.getCurrentEagAttributes(); | ||
gotoNonErrorState(READY); | ||
subchannelMetrics.recordConnectionAttemptSucceeded(buildLabelSet( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Label for target? Also for disconnections and connection attempt failures below. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. buildLabelSet() gets it from the class var |
||
getAttributeOrDefault( | ||
addressIndex.getCurrentEagAttributes(), NameResolver.ATTR_BACKEND_SERVICE), | ||
getAttributeOrDefault( | ||
addressIndex.getCurrentEagAttributes(), LoadBalancer.ATTR_LOCALITY_NAME), | ||
null, | ||
extractSecurityLevel( | ||
addressIndex.getCurrentEagAttributes().get(GrpcAttributes.ATTR_SECURITY_LEVEL)) | ||
)); | ||
} | ||
} | ||
}); | ||
|
@@ -618,11 +636,27 @@ public void run() { | |
activeTransport = null; | ||
addressIndex.reset(); | ||
gotoNonErrorState(IDLE); | ||
subchannelMetrics.recordDisconnection(buildLabelSet( | ||
getAttributeOrDefault( | ||
addressIndex.getCurrentEagAttributes(), NameResolver.ATTR_BACKEND_SERVICE), | ||
getAttributeOrDefault( | ||
addressIndex.getCurrentEagAttributes(), LoadBalancer.ATTR_LOCALITY_NAME), | ||
"Peer Pressure", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not in the gRFC? Instead there is a "List of allowed values for grpc.disconnect_error". There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For Phase 1 we won't be plumbing disconnect_error, will raise another PR with this as the base branch for the same |
||
extractSecurityLevel( | ||
addressIndex.getCurrentEagAttributes().get(GrpcAttributes.ATTR_SECURITY_LEVEL)) | ||
)); | ||
} else if (pendingTransport == transport) { | ||
subchannelMetrics.recordConnectionAttemptFailed(buildLabelSet( | ||
getAttributeOrDefault( | ||
addressIndex.getCurrentEagAttributes(), NameResolver.ATTR_BACKEND_SERVICE), | ||
getAttributeOrDefault( | ||
addressIndex.getCurrentEagAttributes(), LoadBalancer.ATTR_LOCALITY_NAME), | ||
null, null | ||
)); | ||
Preconditions.checkState(state.getState() == CONNECTING, | ||
"Expected state is CONNECTING, actual state is %s", state.getState()); | ||
addressIndex.increment(); | ||
// Continue reconnect if there are still addresses to try. | ||
// Continue to reconnect if there are still addresses to try. | ||
if (!addressIndex.isValid()) { | ||
pendingTransport = null; | ||
addressIndex.reset(); | ||
|
@@ -658,6 +692,27 @@ public void run() { | |
} | ||
}); | ||
} | ||
|
||
private String extractSecurityLevel(SecurityLevel securityLevel) { | ||
if (securityLevel == null) { | ||
return "none"; | ||
} | ||
switch (securityLevel) { | ||
case NONE: | ||
return "none"; | ||
case INTEGRITY: | ||
return "integrity_only"; | ||
case PRIVACY_AND_INTEGRITY: | ||
return "privacy_and_integrity"; | ||
default: | ||
throw new IllegalArgumentException("Unknown SecurityLevel: " + securityLevel); | ||
} | ||
} | ||
|
||
private String getAttributeOrDefault(Attributes attributes, Attributes.Key<String> key) { | ||
String value = attributes.get(key); | ||
return value == null ? "" : value; | ||
} | ||
} | ||
|
||
// All methods are called in syncContext | ||
|
@@ -817,6 +872,18 @@ private String printShortStatus(Status status) { | |
return buffer.toString(); | ||
} | ||
|
||
private OtelMetricsAttributes buildLabelSet(String backendService, String locality, | ||
String disconnectError, String securityLevel) { | ||
return new OtelMetricsAttributes( | ||
target, | ||
backendService, | ||
locality, | ||
disconnectError, | ||
securityLevel | ||
); | ||
} | ||
|
||
|
||
@VisibleForTesting | ||
static final class TransportLogger extends ChannelLogger { | ||
// Changed just after construction to break a cyclic dependency. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -415,7 +415,7 @@ void exitIdleMode() { | |
LbHelperImpl lbHelper = new LbHelperImpl(); | ||
lbHelper.lb = loadBalancerFactory.newLoadBalancer(lbHelper); | ||
// Delay setting lbHelper until fully initialized, since loadBalancerFactory is user code and | ||
// may throw. We don't want to confuse our state, even if we will enter panic mode. | ||
// may throw. We don't want to confuse our state, even if we enter panic mode. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the comment is more accurate as it is. After entering panic mode there is nothing much we can do about state, the comment implies delaying entering that panic mode and maintaining a sane state for the channel for as long as possible before bringing in potential user code. |
||
this.lbHelper = lbHelper; | ||
|
||
channelStateManager.gotoState(CONNECTING); | ||
|
@@ -1464,7 +1464,9 @@ void onStateChange(InternalSubchannel is, ConnectivityStateInfo newState) { | |
subchannelTracer, | ||
subchannelLogId, | ||
subchannelLogger, | ||
transportFilters); | ||
transportFilters, | ||
target, | ||
lbHelper.getMetricRecorder()); | ||
oobChannelTracer.reportEvent(new ChannelTrace.Event.Builder() | ||
.setDescription("Child Subchannel created") | ||
.setSeverity(ChannelTrace.Event.Severity.CT_INFO) | ||
|
@@ -1895,7 +1897,8 @@ void onNotInUse(InternalSubchannel is) { | |
subchannelTracer, | ||
subchannelLogId, | ||
subchannelLogger, | ||
transportFilters); | ||
transportFilters, target, | ||
lbHelper.getMetricRecorder()); | ||
|
||
channelTracer.reportEvent(new ChannelTrace.Event.Builder() | ||
.setDescription("Child Subchannel started") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ | |
import io.grpc.LongCounterMetricInstrument; | ||
import io.grpc.LongGaugeMetricInstrument; | ||
import io.grpc.LongHistogramMetricInstrument; | ||
import io.grpc.LongUpDownCounterMetricInstrument; | ||
import io.grpc.MetricInstrument; | ||
import io.grpc.MetricInstrumentRegistry; | ||
import io.grpc.MetricRecorder; | ||
|
@@ -82,7 +83,7 @@ public void addDoubleCounter(DoubleCounterMetricInstrument metricInstrument, dou | |
* Records a long counter value. | ||
* | ||
* @param metricInstrument the {@link LongCounterMetricInstrument} to record. | ||
* @param value the value to record. | ||
* @param value the value to record. Must be non-negative. | ||
* @param requiredLabelValues the required label values for the metric. | ||
* @param optionalLabelValues the optional label values for the metric. | ||
*/ | ||
|
@@ -103,6 +104,32 @@ public void addLongCounter(LongCounterMetricInstrument metricInstrument, long va | |
} | ||
} | ||
|
||
/** | ||
* Adds a long up down counter value. | ||
* | ||
* @param metricInstrument the {@link io.grpc.LongUpDownCounterMetricInstrument} to record. | ||
* @param value the value to record. May be positive, negative or zero. | ||
* @param requiredLabelValues the required label values for the metric. | ||
* @param optionalLabelValues the optional label values for the metric. | ||
*/ | ||
@Override | ||
public void addLongUpDownCounter(LongUpDownCounterMetricInstrument metricInstrument, long value, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add unit tests. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are just wrappers on the underlying OTel API for UpDownCounter... |
||
List<String> requiredLabelValues, | ||
List<String> optionalLabelValues) { | ||
MetricRecorder.super.addLongUpDownCounter(metricInstrument, value, requiredLabelValues, | ||
optionalLabelValues); | ||
for (MetricSink sink : metricSinks) { | ||
int measuresSize = sink.getMeasuresSize(); | ||
if (measuresSize <= metricInstrument.getIndex()) { | ||
// Measures may need updating in two cases: | ||
// 1. When the sink is initially created with an empty list of measures. | ||
// 2. When new metric instruments are registered, requiring the sink to accommodate them. | ||
sink.updateMeasures(registry.getMetricInstruments()); | ||
} | ||
sink.addLongUpDownCounter(metricInstrument, value, requiredLabelValues, optionalLabelValues); | ||
} | ||
} | ||
|
||
/** | ||
* Records a double histogram value. | ||
* | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add unit test.