Skip to content

Merge span normalizer into grouper #384

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions hypertrace-ingester/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ dependencies {
implementation(project(":span-normalizer:span-normalizer-constants"))
implementation(project(":span-normalizer:raw-span-constants"))
implementation(project(":semantic-convention-utils"))
implementation(project(":span-normalizer:span-normalizer"))
implementation(project(":raw-spans-grouper:raw-spans-grouper"))
implementation(project(":hypertrace-trace-enricher:hypertrace-trace-enricher"))
implementation(project(":hypertrace-view-generator:hypertrace-view-generator"))
Expand Down Expand Up @@ -65,12 +64,6 @@ tasks.processResources {

tasks.register<Copy>("copyServiceConfigs") {
with(
createCopySpec(
"span-normalizer",
"span-normalizer",
"main",
"common"
),
createCopySpec(
"raw-spans-grouper",
"raw-spans-grouper",
Expand Down Expand Up @@ -143,12 +136,6 @@ tasks.test {

tasks.register<Copy>("copyServiceConfigsTest") {
with(
createCopySpec(
"span-normalizer",
"span-normalizer",
"test",
"span-normalizer"
),
createCopySpec(
"raw-spans-grouper",
"raw-spans-grouper",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import org.hypertrace.core.serviceframework.config.ConfigClient;
import org.hypertrace.core.serviceframework.config.ConfigClientFactory;
import org.hypertrace.core.serviceframework.config.ConfigUtils;
import org.hypertrace.core.spannormalizer.SpanNormalizer;
import org.hypertrace.core.viewgenerator.service.MultiViewGeneratorLauncher;
import org.hypertrace.metrics.exporter.MetricsExporterService;
import org.hypertrace.metrics.generator.MetricsGenerator;
Expand Down Expand Up @@ -52,9 +51,6 @@ public HypertraceIngester(ConfigClient configClient) {
private KafkaStreamsApp getSubTopologyInstance(String name) {
KafkaStreamsApp kafkaStreamsApp;
switch (name) {
case "span-normalizer":
kafkaStreamsApp = new SpanNormalizer(ConfigClientFactory.getClient());
break;
case "raw-spans-grouper":
kafkaStreamsApp = new RawSpansGrouper(ConfigClientFactory.getClient());
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ service.name = hypertrace-ingester
service.admin.port = 8099

sub.topology.names = [
"span-normalizer",
"raw-spans-grouper",
"hypertrace-trace-enricher",
"all-views"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import com.google.protobuf.ByteString;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import io.jaegertracing.api_v2.JaegerSpanInternalModel.Span;
import io.jaegertracing.api_v2.JaegerSpanInternalModel;
import java.io.File;
import java.nio.file.Path;
import java.time.Duration;
Expand Down Expand Up @@ -36,7 +36,6 @@ public class HypertraceIngesterTest {
private static final String CONFIG_PATH = "configs/%s/application.conf";
private HypertraceIngester underTest;
private Config underTestConfig;
private Config spanNormalizerConfig;
private Config rawSpansGrouperConfig;
private Config traceEnricherConfig;
private Config spanEventViewGeneratorConfig;
Expand All @@ -45,7 +44,6 @@ public class HypertraceIngesterTest {
public void setUp() {
underTest = new HypertraceIngester(ConfigClientFactory.getClient());
underTestConfig = getConfig("hypertrace-ingester");
spanNormalizerConfig = getConfig("span-normalizer");
rawSpansGrouperConfig = getConfig("raw-spans-grouper");
traceEnricherConfig = getConfig("hypertrace-trace-enricher");
spanEventViewGeneratorConfig = getConfig("view-gen-span-event");
Expand All @@ -71,30 +69,26 @@ public void testIngestionPacketFlow(@TempDir Path tempDir) {
// create topology test driver for ingester
TopologyTestDriver topologyTestDriver = new TopologyTestDriver(streamsBuilder.build(), props);

Span span =
Span.newBuilder()
JaegerSpanInternalModel.Span span =
JaegerSpanInternalModel.Span.newBuilder()
.setSpanId(ByteString.copyFrom("1".getBytes()))
.setTraceId(ByteString.copyFrom("trace-1".getBytes()))
.addTags(
JaegerSpanInternalModel.KeyValue.newBuilder()
.setKey("tenant-id")
.setVStr("tenant-1")
.build())
.build();

TestInputTopic<byte[], Span> spanNormalizerInputTopic =
TestInputTopic<byte[], JaegerSpanInternalModel.Span> spanGrouperInputTopic =
topologyTestDriver.createInputTopic(
spanNormalizerConfig.getString(
rawSpansGrouperConfig.getString(
org.hypertrace.core.spannormalizer.constants.SpanNormalizerConstants
.INPUT_TOPIC_CONFIG_KEY),
Serdes.ByteArray().serializer(),
new JaegerSpanSerde().serializer());

spanNormalizerInputTopic.pipeInput(span);

// create output topic for span-normalizer topology
TestOutputTopic spanNormalizerOutputTopic =
topologyTestDriver.createOutputTopic(
spanNormalizerConfig.getString(
StructuredTraceEnricherConstants.OUTPUT_TOPIC_CONFIG_KEY),
Serdes.String().deserializer(),
new AvroSerde<>().deserializer());
assertNotNull(spanNormalizerOutputTopic.readKeyValue());
spanGrouperInputTopic.pipeInput(span);

topologyTestDriver.advanceWallClockTime(Duration.ofSeconds(32));

Expand All @@ -107,14 +101,6 @@ public void testIngestionPacketFlow(@TempDir Path tempDir) {
new AvroSerde<>().deserializer());
assertNotNull(spanGrouperOutputTopic.readKeyValue());

// create output topic for trace-enricher topology
TestOutputTopic traceEnricherOutputTopic =
topologyTestDriver.createOutputTopic(
traceEnricherConfig.getString(StructuredTraceEnricherConstants.OUTPUT_TOPIC_CONFIG_KEY),
Serdes.String().deserializer(),
new AvroSerde<>().deserializer());
assertNotNull(traceEnricherOutputTopic.readKeyValue());

// create output topic for topology
TestOutputTopic spanEventViewOutputTopic =
topologyTestDriver.createOutputTopic(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ main.class = org.hypertrace.ingester.HypertraceIngester
service.name = hypertrace-ingester
service.admin.port = 8099

sub.topology.names = ["span-normalizer", "raw-spans-grouper", "hypertrace-trace-enricher", "all-views"]
sub.topology.names = ["raw-spans-grouper", "hypertrace-trace-enricher", "all-views"]

precreate.topics = false

Expand Down
80 changes: 80 additions & 0 deletions raw-spans-grouper/helm/templates/raw-spans-grouper-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,83 @@ data:
}
}
{{- end }}

# span normalizer config
{{- if hasKey .Values.spanNormalizerConfig "processor" }}
processor {
{{- if hasKey .Values.spanNormalizerConfig.processor "tenantIdTagKey" }}
tenantIdTagKey = "{{ .Values.spanNormalizerConfig.processor.tenantIdTagKey }}"
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig.processor "excludeTenantIds" }}
excludeTenantIds = {{ .Values.spanNormalizerConfig.processor.excludeTenantIds | toJson }}
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig.processor "defaultTenantId" }}
defaultTenantId = "{{ .Values.spanNormalizerConfig.processor.defaultTenantId }}"
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig.processor "spanDropCriterion" }}
spanDropCriterion = {{ .Values.spanNormalizerConfig.processor.spanDropCriterion | toJson }}
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig.processor "spanDropFilters" }}
spanDropFilters = {{ .Values.spanNormalizerConfig.processor.spanDropFilters | toJson }}
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig.processor "bypassKey" }}
bypass.key = "{{ .Values.spanNormalizerConfig.processor.bypassKey }}"
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig.processor "lateArrivalThresholdDuration" }}
late.arrival.threshold.duration = "{{ .Values.spanNormalizerConfig.processor.lateArrivalThresholdDuration }}"
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig.processor "allowedAttributesPrefixes" }}
allowed.attributes.prefixes = {{ .Values.spanNormalizerConfig.processor.allowedAttributesPrefixes | toJson }}
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig.processor "prefixedMatchedAllowedAttributes" }}
prefixed.matched.allowed.attributes = {{ .Values.spanNormalizerConfig.processor.prefixedMatchedAllowedAttributes | toJson }}
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig.processor "rootExitSpanDropCriterion" }}
rootExitSpanDropCriterion = {{ .Values.spanNormalizerConfig.processor.rootExitSpanDropCriterion | toJson }}
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig.processor "excludeLogsTenantIds" }}
excludeLogsTenantIds = {{ .Values.spanNormalizerConfig.processor.excludeLogsTenantIds | toJson }}
{{- end }}
}
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig "metrics" }}
metrics {
reporter {
names = {{- toJson .Values.spanNormalizerConfig.metrics.reporter.names | trim | nindent 12 }}
}
}
{{- end }}
clients {
config.service.config = {
host = {{ .Values.spanNormalizerConfig.configServiceHost }}
port = {{ .Values.spanNormalizerConfig.configServicePort }}
}
}
span.rules.exclude {
cache = {
refreshAfterWriteDuration = {{ .Values.spanNormalizerConfig.excludeSpanRulesConfig.cache.refreshAfterWriteDuration }}
expireAfterWriteDuration = {{ .Values.spanNormalizerConfig.excludeSpanRulesConfig.cache.expireAfterWriteDuration }}
}
}
{{- if hasKey .Values.spanNormalizerConfig "rateLimitConfig" }}
rate.limit.config = [
{{- range $k,$v := $.Values.spanNormalizerConfig.rateLimitConfig }}
{
tenantId = {{ $v.tenantId }}
groupingKey = {{ $v.groupingKey }}
maxSpansPerMinute = {{ $v.maxSpansPerMinute }}
},
{{- end }}
]
{{- end }}
16 changes: 16 additions & 0 deletions raw-spans-grouper/helm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,15 @@ rawSpansGrouperConfig:
groupby:
internal: 30

spanNormalizerConfig:
name: span-normalizer-config
excludeSpanRulesConfig:
cache:
refreshAfterWriteDuration: 3m
expireAfterWriteDuration: 5m
rateLimitConfig: []


logConfig:
name: raw-spans-grouper-log-config
monitorInterval: 30
Expand Down Expand Up @@ -166,6 +175,13 @@ kafka-topic-creator:
partitions: 8
configs:
cleanup.policy: "[compact, delete]"
raw-logs:
replicationFactor: 3
partitions: 8
configs:
retention.bytes: 8589934592 # default = -1
retention.ms: 86400000 # default = 604800000 (7 days)
max.message.bytes: 1048588 # default = 1048588

zookeeper:
address: zookeeper:2181
Expand Down
15 changes: 15 additions & 0 deletions raw-spans-grouper/raw-spans-grouper/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ dependencies {
because("https://snyk.io/vuln/SNYK-JAVA-ORGGLASSFISHJERSEYCORE-1255637")
}
implementation(project(":span-normalizer:span-normalizer-api"))
implementation(project(":span-normalizer:raw-span-constants"))
implementation(project(":span-normalizer:span-normalizer-constants"))
implementation(project(":semantic-convention-utils"))
implementation("org.hypertrace.core.datamodel:data-model:0.1.27")
implementation("org.hypertrace.core.serviceframework:platform-service-framework:0.1.49")
implementation("org.hypertrace.core.serviceframework:platform-metrics:0.1.49")
Expand All @@ -41,6 +44,18 @@ dependencies {
implementation("org.hypertrace.core.kafkastreams.framework:weighted-group-partitioner:0.2.6")
implementation("de.javakaffee:kryo-serializers:0.45")
implementation("com.google.guava:guava:31.1-jre")
implementation("org.apache.commons:commons-lang3:3.12.0")

// Required for the GRPC clients.
runtimeOnly("io.grpc:grpc-netty:1.50.0")
annotationProcessor("org.projectlombok:lombok:1.18.18")
compileOnly("org.projectlombok:lombok:1.18.18")

implementation("org.hypertrace.config.service:span-processing-config-service-api:0.1.47")
implementation("org.hypertrace.config.service:span-processing-utils:0.1.47")

implementation("org.hypertrace.core.grpcutils:grpc-client-utils:0.11.2")
implementation("org.hypertrace.core.grpcutils:grpc-context-utils:0.11.2")

// Logging
implementation("org.slf4j:slf4j-api:1.7.30")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,18 @@
import static org.hypertrace.core.rawspansgrouper.RawSpanGrouperConstants.RAW_SPANS_GROUPER_JOB_CONFIG;
import static org.hypertrace.core.rawspansgrouper.RawSpanGrouperConstants.SPAN_STATE_STORE_NAME;
import static org.hypertrace.core.rawspansgrouper.RawSpanGrouperConstants.TRACE_STATE_STORE;
import static org.hypertrace.core.spannormalizer.constants.SpanNormalizerConstants.BYPASS_OUTPUT_TOPIC_CONFIG_KEY;
import static org.hypertrace.core.spannormalizer.constants.SpanNormalizerConstants.OUTPUT_TOPIC_RAW_LOGS_CONFIG_KEY;

import com.typesafe.config.Config;
import io.jaegertracing.api_v2.JaegerSpanInternalModel;
import java.util.List;
import java.util.Map;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.kstream.Consumed;
import org.apache.kafka.streams.kstream.KStream;
import org.apache.kafka.streams.kstream.Named;
import org.apache.kafka.streams.kstream.Produced;
Expand All @@ -29,6 +34,13 @@
import org.hypertrace.core.spannormalizer.SpanIdentity;
import org.hypertrace.core.spannormalizer.TraceIdentity;
import org.hypertrace.core.spannormalizer.TraceState;
import org.hypertrace.core.spannormalizer.jaeger.JaegerSpanPreProcessor;
import org.hypertrace.core.spannormalizer.jaeger.JaegerSpanSerde;
import org.hypertrace.core.spannormalizer.jaeger.JaegerSpanToAvroRawSpanTransformer;
import org.hypertrace.core.spannormalizer.jaeger.JaegerSpanToLogRecordsTransformer;
import org.hypertrace.core.spannormalizer.jaeger.PreProcessedSpan;
import org.hypertrace.core.spannormalizer.rawspan.ByPassPredicate;
import org.hypertrace.core.spannormalizer.rawspan.RawSpanToStructuredTraceTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -44,17 +56,19 @@ public StreamsBuilder buildTopology(
Map<String, Object> properties,
StreamsBuilder streamsBuilder,
Map<String, KStream<?, ?>> inputStreams) {

Config jobConfig = getJobConfig(properties);
String inputTopic = jobConfig.getString(INPUT_TOPIC_CONFIG_KEY);
String outputTopic = jobConfig.getString(OUTPUT_TOPIC_CONFIG_KEY);
String bypassOutputTopic = jobConfig.getString(BYPASS_OUTPUT_TOPIC_CONFIG_KEY);
String outputTopicRawLogs = jobConfig.getString(OUTPUT_TOPIC_RAW_LOGS_CONFIG_KEY);

KStream<TraceIdentity, RawSpan> inputStream =
(KStream<TraceIdentity, RawSpan>) inputStreams.get(inputTopic);
KStream<byte[], JaegerSpanInternalModel.Span> inputStream =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needs to be an incremental change. We need to read from both jaeger-spans and raw-spans-from-jaeger-spans topics in this PR.

Only in follow up PR some time down the line, we can just read from jaeger-spans

(KStream<byte[], JaegerSpanInternalModel.Span>) inputStreams.get(inputTopic);
if (inputStream == null) {
inputStream =
streamsBuilder
// read the input topic
.stream(inputTopic);
streamsBuilder.stream(
inputTopic, Consumed.with(Serdes.ByteArray(), new JaegerSpanSerde()));
inputStreams.put(inputTopic, inputStream);
}

Expand All @@ -75,7 +89,34 @@ public StreamsBuilder buildTopology(
streamsBuilder.addStateStore(spanStoreBuilder);
streamsBuilder.addStateStore(traceStateStoreBuilder);

StreamPartitioner<TraceIdentity, StructuredTrace> groupPartitioner =
KStream<byte[], PreProcessedSpan> preProcessedStream =
inputStream.transform(() -> new JaegerSpanPreProcessor(getGrpcChannelRegistry()));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How are we ensuring that this jaeger spans topic is consumed exactly from the offset where the span-normalizer got decommissioned?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. Being different applications, it will be difficult to get them in sync.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My suggestion would be the following:
We shouldn't even be reading from the existing jaeger-spans topic. This topology should read from a different topic(say jaeger-spans-v2). On the producer side(i.e. hypertrace-collector), we should have a way to control whether to produce to the existing jaeger-spans topic or to the new jaegar-spans-v2. Once we make the flip to the v2 topic we can decommission span-normalizer.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably need to keep both applications for now. And may need to move raw-span-grouper logic to span-normalizer instead of other way around.


// logs output
preProcessedStream.transform(JaegerSpanToLogRecordsTransformer::new).to(outputTopicRawLogs);

KStream<TraceIdentity, RawSpan>[] branches =
preProcessedStream
.transform(JaegerSpanToAvroRawSpanTransformer::new)
.branch(new ByPassPredicate(jobConfig), (key, value) -> true);

KStream<TraceIdentity, RawSpan> bypassTopicBranch = branches[0];
KStream<TraceIdentity, RawSpan> outputTopicBranch = branches[1];

StreamPartitioner<String, StructuredTrace> tenantIsolationPartitionerForBypassTopic =
new GroupPartitionerBuilder<String, StructuredTrace>()
.buildPartitioner(
"spans",
jobConfig,
(traceid, span) -> traceid,
new KeyHashPartitioner<>(),
getGrpcChannelRegistry());

bypassTopicBranch
.transform(RawSpanToStructuredTraceTransformer::new)
.to(bypassOutputTopic, Produced.with(null, null, tenantIsolationPartitionerForBypassTopic));

StreamPartitioner<TraceIdentity, StructuredTrace> tenantIsolationPartitionerForOutputTopic =
new GroupPartitionerBuilder<TraceIdentity, StructuredTrace>()
.buildPartitioner(
"spans",
Expand All @@ -85,10 +126,10 @@ public StreamsBuilder buildTopology(
getGrpcChannelRegistry());

Produced<TraceIdentity, StructuredTrace> outputTopicProducer =
Produced.with(null, null, groupPartitioner);
Produced.with(null, null, tenantIsolationPartitionerForOutputTopic);
outputTopicProducer = outputTopicProducer.withName(OUTPUT_TOPIC_PRODUCER);

inputStream
outputTopicBranch
.transform(
RawSpansProcessor::new,
Named.as(RawSpansProcessor.class.getSimpleName()),
Expand Down
Loading