diff --git a/go.mod b/go.mod index 506d578db..32bffe49a 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,8 @@ require ( github.com/google/uuid v1.6.0 github.com/spf13/cobra v1.9.1 github.com/stretchr/testify v1.10.0 + go.opentelemetry.io/otel v1.28.0 + go.opentelemetry.io/otel/trace v1.28.0 go.uber.org/mock v0.5.0 golang.org/x/sync v0.12.0 google.golang.org/protobuf v1.36.5 diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 0347fa1c9..7744cc76f 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -23,6 +23,7 @@ import ( "github.com/argoproj/gitops-engine/pkg/sync" "github.com/argoproj/gitops-engine/pkg/sync/common" "github.com/argoproj/gitops-engine/pkg/utils/kube" + "github.com/argoproj/gitops-engine/pkg/utils/tracing" ) const ( @@ -84,7 +85,7 @@ func (e *gitOpsEngine) Sync(ctx context.Context, return nil, err } opts = append(opts, sync.WithSkipHooks(!diffRes.Modified)) - syncCtx, cleanup, err := sync.NewSyncContext(revision, result, e.config, e.config, e.kubectl, namespace, e.cache.GetOpenAPISchema(), opts...) + syncCtx, cleanup, err := sync.NewSyncContext(revision, result, e.config, e.config, e.kubectl, namespace, e.cache.GetOpenAPISchema(), tracing.NopTracer{}, "", "", opts...) if err != nil { return nil, err } diff --git a/pkg/sync/sync_context.go b/pkg/sync/sync_context.go index 3420a351c..2a94ec6ad 100644 --- a/pkg/sync/sync_context.go +++ b/pkg/sync/sync_context.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "sort" + "strconv" "strings" "sync" "time" @@ -32,6 +33,7 @@ import ( "github.com/argoproj/gitops-engine/pkg/sync/hook" resourceutil "github.com/argoproj/gitops-engine/pkg/sync/resource" kubeutil "github.com/argoproj/gitops-engine/pkg/utils/kube" + "github.com/argoproj/gitops-engine/pkg/utils/tracing" ) type reconciledResource struct { @@ -221,6 +223,8 @@ func NewSyncContext( kubectl kubeutil.Kubectl, namespace string, openAPISchema openapi.Resources, + syncTracer tracing.Tracer, + syncTraceID, syncTraceRootSpanID string, opts ...SyncOpt, ) (SyncContext, func(), error) { dynamicIf, err := dynamic.NewForConfig(restConfig) @@ -258,6 +262,9 @@ func NewSyncContext( permissionValidator: func(_ *unstructured.Unstructured, _ *metav1.APIResource) error { return nil }, + syncTracer: syncTracer, + syncTraceID: syncTraceID, + syncTraceRootSpanID: syncTraceRootSpanID, } for _, opt := range opts { opt(ctx) @@ -371,6 +378,11 @@ type syncContext struct { // lock to protect concurrent updates of the result list lock sync.Mutex + // tracer for tracing the sync operation + syncTraceID string + syncTraceRootSpanID string + syncTracer tracing.Tracer + // syncNamespace is a function that will determine if the managed // namespace should be synced syncNamespace func(*unstructured.Unstructured, *unstructured.Unstructured) (bool, error) @@ -1291,6 +1303,8 @@ func (sc *syncContext) runTasks(tasks syncTasks, dryRun bool) runState { ss.Go(func(state runState) runState { logCtx := sc.log.WithValues("dryRun", dryRun, "task", t) logCtx.V(1).Info("Pruning") + span := sc.createSpan("pruneObject", dryRun) + defer span.Finish() result, message := sc.pruneObject(t.liveObj, sc.prune, dryRun) if result == common.ResultCodeSyncFailed { state = failed @@ -1299,6 +1313,7 @@ func (sc *syncContext) runTasks(tasks syncTasks, dryRun bool) runState { if !dryRun || sc.dryRun || result == common.ResultCodeSyncFailed { sc.setResourceResult(t, result, operationPhases[result], message) } + sc.setBaggageItemForTasks(&span, t, message, result, operationPhases[result]) return state }) } @@ -1317,6 +1332,10 @@ func (sc *syncContext) runTasks(tasks syncTasks, dryRun bool) runState { t := task ss.Go(func(state runState) runState { sc.log.WithValues("dryRun", dryRun, "task", t).V(1).Info("Deleting") + span := sc.createSpan("hooksDeletion", dryRun) + defer span.Finish() + message := "deleted" + operationPhase := common.OperationRunning if !dryRun { err := sc.deleteResource(t) if err != nil { @@ -1324,13 +1343,18 @@ func (sc *syncContext) runTasks(tasks syncTasks, dryRun bool) runState { // delete is requested, we treat this as a nop if !apierrors.IsNotFound(err) { state = failed - sc.setResourceResult(t, "", common.OperationError, fmt.Sprintf("failed to delete resource: %v", err)) + message = fmt.Sprintf("failed to delete resource: %v", err) + operationPhase = common.OperationError + sc.setResourceResult(t, "", operationPhase, message) } } else { + message = "deleted(dry-run)" // if there is anything that needs deleting, we are at best now in pending and // want to return and wait for sync to be invoked again state = pending + operationPhase = common.OperationSucceeded } + sc.setBaggageItemForTasks(&span, t, message, "", operationPhase) } return state }) @@ -1359,6 +1383,29 @@ func (sc *syncContext) runTasks(tasks syncTasks, dryRun bool) runState { return state } +func (sc *syncContext) createSpan(operation string, dryrun bool) tracing.Span { + // NOTE: we use context.Background() here because we don't want to inherit any trace context from the parent + var span tracing.Span + ctx := context.Background() + if sc.syncTracer == nil { + span = tracing.NopTracer{}.StartSpan(ctx, operation) + } else { + span = sc.syncTracer.StartSpanFromTraceParent(ctx, operation, sc.syncTraceID, sc.syncTraceRootSpanID) + } + span.SetBaggageItem("dryrun", strconv.FormatBool(dryrun)) + return span +} + +func (sc *syncContext) setBaggageItemForTasks(span *tracing.Span, t *syncTask, message string, result common.ResultCode, operationPhase common.OperationPhase) { + resourceKey := t.resourceKey() + (*span).SetBaggageItem("resource", resourceKey.String()) + (*span).SetBaggageItem("result", string(result)) + (*span).SetBaggageItem("operationPhase", string(operationPhase)) + (*span).SetBaggageItem("message", message) + (*span).SetBaggageItem("phase", string(t.phase)) + (*span).SetBaggageItem("wave", strconv.Itoa(t.wave())) +} + func (sc *syncContext) processCreateTasks(state runState, tasks syncTasks, dryRun bool) runState { ss := newStateSync(state) for _, task := range tasks { @@ -1370,13 +1417,16 @@ func (sc *syncContext) processCreateTasks(state runState, tasks syncTasks, dryRu logCtx := sc.log.WithValues("dryRun", dryRun, "task", t) logCtx.V(1).Info("Applying") validate := sc.validate && !resourceutil.HasAnnotationOption(t.targetObj, common.AnnotationSyncOptions, common.SyncOptionsDisableValidation) + span := sc.createSpan("applyObject", dryRun) + defer span.Finish() result, message := sc.applyObject(t, dryRun, validate) if result == common.ResultCodeSyncFailed { logCtx.WithValues("message", message).Info("Apply failed") state = failed } + var phase common.OperationPhase if !dryRun || sc.dryRun || result == common.ResultCodeSyncFailed { - phase := operationPhases[result] + phase = operationPhases[result] // no resources are created in dry-run, so running phase means validation was // successful and sync operation succeeded if sc.dryRun && phase == common.OperationRunning { @@ -1384,6 +1434,7 @@ func (sc *syncContext) processCreateTasks(state runState, tasks syncTasks, dryRu } sc.setResourceResult(t, result, phase, message) } + sc.setBaggageItemForTasks(&span, t, message, result, phase) return state }) } diff --git a/pkg/utils/kube/ctl.go b/pkg/utils/kube/ctl.go index 8af205ad4..8c5ac364b 100644 --- a/pkg/utils/kube/ctl.go +++ b/pkg/utils/kube/ctl.go @@ -163,7 +163,7 @@ func (k *KubectlCmd) newGVKParser(oapiGetter discovery.OpenAPISchemaInterface) ( } func (k *KubectlCmd) GetAPIResources(config *rest.Config, preferred bool, resourceFilter ResourceFilter) ([]APIResourceInfo, error) { - span := k.Tracer.StartSpan("GetAPIResources") + span := k.Tracer.StartSpan(context.Background(), "GetAPIResources") defer span.Finish() apiResIfs, err := k.filterAPIResources(config, preferred, resourceFilter, func(apiResource *metav1.APIResource) bool { return isSupportedVerb(apiResource, listVerb) && isSupportedVerb(apiResource, watchVerb) @@ -176,7 +176,7 @@ func (k *KubectlCmd) GetAPIResources(config *rest.Config, preferred bool, resour // GetResource returns resource func (k *KubectlCmd) GetResource(ctx context.Context, config *rest.Config, gvk schema.GroupVersionKind, name string, namespace string) (*unstructured.Unstructured, error) { - span := k.Tracer.StartSpan("GetResource") + span := k.Tracer.StartSpan(context.Background(), "GetResource") span.SetBaggageItem("kind", gvk.Kind) span.SetBaggageItem("name", name) defer span.Finish() @@ -199,7 +199,7 @@ func (k *KubectlCmd) GetResource(ctx context.Context, config *rest.Config, gvk s // CreateResource creates resource func (k *KubectlCmd) CreateResource(ctx context.Context, config *rest.Config, gvk schema.GroupVersionKind, name string, namespace string, obj *unstructured.Unstructured, createOptions metav1.CreateOptions, subresources ...string) (*unstructured.Unstructured, error) { - span := k.Tracer.StartSpan("CreateResource") + span := k.Tracer.StartSpan(context.Background(), "CreateResource") span.SetBaggageItem("kind", gvk.Kind) span.SetBaggageItem("name", name) defer span.Finish() @@ -222,7 +222,7 @@ func (k *KubectlCmd) CreateResource(ctx context.Context, config *rest.Config, gv // PatchResource patches resource func (k *KubectlCmd) PatchResource(ctx context.Context, config *rest.Config, gvk schema.GroupVersionKind, name string, namespace string, patchType types.PatchType, patchBytes []byte, subresources ...string) (*unstructured.Unstructured, error) { - span := k.Tracer.StartSpan("PatchResource") + span := k.Tracer.StartSpan(context.Background(), "PatchResource") span.SetBaggageItem("kind", gvk.Kind) span.SetBaggageItem("name", name) defer span.Finish() @@ -245,7 +245,7 @@ func (k *KubectlCmd) PatchResource(ctx context.Context, config *rest.Config, gvk // DeleteResource deletes resource func (k *KubectlCmd) DeleteResource(ctx context.Context, config *rest.Config, gvk schema.GroupVersionKind, name string, namespace string, deleteOptions metav1.DeleteOptions) error { - span := k.Tracer.StartSpan("DeleteResource") + span := k.Tracer.StartSpan(context.Background(), "DeleteResource") span.SetBaggageItem("kind", gvk.Kind) span.SetBaggageItem("name", name) defer span.Finish() @@ -323,7 +323,7 @@ func ManageServerSideDiffDryRuns(config *rest.Config, openAPISchema openapi.Reso // ConvertToVersion converts an unstructured object into the specified group/version func (k *KubectlCmd) ConvertToVersion(obj *unstructured.Unstructured, group string, version string) (*unstructured.Unstructured, error) { - span := k.Tracer.StartSpan("ConvertToVersion") + span := k.Tracer.StartSpan(context.Background(), "ConvertToVersion") from := obj.GroupVersionKind().GroupVersion() span.SetBaggageItem("from", from.String()) span.SetBaggageItem("to", schema.GroupVersion{Group: group, Version: version}.String()) @@ -335,7 +335,7 @@ func (k *KubectlCmd) ConvertToVersion(obj *unstructured.Unstructured, group stri } func (k *KubectlCmd) GetServerVersion(config *rest.Config) (string, error) { - span := k.Tracer.StartSpan("GetServerVersion") + span := k.Tracer.StartSpan(context.Background(), "GetServerVersion") defer span.Finish() client, err := discovery.NewDiscoveryClientForConfig(config) if err != nil { diff --git a/pkg/utils/kube/resource_ops.go b/pkg/utils/kube/resource_ops.go index 039749ac3..5427dec4d 100644 --- a/pkg/utils/kube/resource_ops.go +++ b/pkg/utils/kube/resource_ops.go @@ -221,7 +221,7 @@ func kubeCmdFactory(kubeconfig, ns string, config *rest.Config) cmdutil.Factory } func (k *kubectlResourceOperations) ReplaceResource(ctx context.Context, obj *unstructured.Unstructured, dryRunStrategy cmdutil.DryRunStrategy, force bool) (string, error) { - span := k.tracer.StartSpan("ReplaceResource") + span := k.tracer.StartSpan(context.Background(), "ReplaceResource") span.SetBaggageItem("kind", obj.GetKind()) span.SetBaggageItem("name", obj.GetName()) defer span.Finish() @@ -243,7 +243,7 @@ func (k *kubectlResourceOperations) ReplaceResource(ctx context.Context, obj *un func (k *kubectlResourceOperations) CreateResource(ctx context.Context, obj *unstructured.Unstructured, dryRunStrategy cmdutil.DryRunStrategy, validate bool) (string, error) { gvk := obj.GroupVersionKind() - span := k.tracer.StartSpan("CreateResource") + span := k.tracer.StartSpan(context.Background(), "CreateResource") span.SetBaggageItem("kind", gvk.Kind) span.SetBaggageItem("name", obj.GetName()) defer span.Finish() @@ -273,7 +273,7 @@ func (k *kubectlResourceOperations) CreateResource(ctx context.Context, obj *uns func (k *kubectlResourceOperations) UpdateResource(ctx context.Context, obj *unstructured.Unstructured, dryRunStrategy cmdutil.DryRunStrategy) (*unstructured.Unstructured, error) { gvk := obj.GroupVersionKind() - span := k.tracer.StartSpan("UpdateResource") + span := k.tracer.StartSpan(context.Background(), "UpdateResource") span.SetBaggageItem("kind", gvk.Kind) span.SetBaggageItem("name", obj.GetName()) defer span.Finish() @@ -302,7 +302,7 @@ func (k *kubectlResourceOperations) UpdateResource(ctx context.Context, obj *uns // ApplyResource performs an apply of a unstructured resource func (k *kubectlServerSideDiffDryRunApplier) ApplyResource(_ context.Context, obj *unstructured.Unstructured, dryRunStrategy cmdutil.DryRunStrategy, force bool, validate bool, serverSideApply bool, manager string) (string, error) { - span := k.tracer.StartSpan("ApplyResource") + span := k.tracer.StartSpan(context.Background(), "ApplyResource") span.SetBaggageItem("kind", obj.GetKind()) span.SetBaggageItem("name", obj.GetName()) defer span.Finish() @@ -328,7 +328,7 @@ func (k *kubectlServerSideDiffDryRunApplier) ApplyResource(_ context.Context, ob // ApplyResource performs an apply of a unstructured resource func (k *kubectlResourceOperations) ApplyResource(ctx context.Context, obj *unstructured.Unstructured, dryRunStrategy cmdutil.DryRunStrategy, force, validate, serverSideApply bool, manager string) (string, error) { - span := k.tracer.StartSpan("ApplyResource") + span := k.tracer.StartSpan(context.Background(), "ApplyResource") span.SetBaggageItem("kind", obj.GetKind()) span.SetBaggageItem("name", obj.GetName()) defer span.Finish() diff --git a/pkg/utils/tracing/api.go b/pkg/utils/tracing/api.go index 89670a67b..7585e2db7 100644 --- a/pkg/utils/tracing/api.go +++ b/pkg/utils/tracing/api.go @@ -1,5 +1,7 @@ package tracing +import "context" + /* Poor Mans OpenTracing. @@ -7,10 +9,13 @@ package tracing */ type Tracer interface { - StartSpan(operationName string) Span + StartSpan(_ context.Context, operationName string) Span + StartSpanFromTraceParent(ctx context.Context, operationName string, parentTraceId, parentSpanId string) Span } type Span interface { SetBaggageItem(key string, value any) Finish() + SpanID() string + TraceID() string } diff --git a/pkg/utils/tracing/logging.go b/pkg/utils/tracing/logging.go index fd0619f99..779ae65c3 100644 --- a/pkg/utils/tracing/logging.go +++ b/pkg/utils/tracing/logging.go @@ -1,6 +1,7 @@ package tracing import ( + "context" "time" "github.com/go-logr/logr" @@ -21,7 +22,7 @@ func NewLoggingTracer(logger logr.Logger) *LoggingTracer { } } -func (l LoggingTracer) StartSpan(operationName string) Span { +func (l LoggingTracer) StartSpan(_ context.Context, operationName string) Span { return loggingSpan{ logger: l.logger, operationName: operationName, @@ -30,6 +31,12 @@ func (l LoggingTracer) StartSpan(operationName string) Span { } } +// loggingSpan is not a real distributed tracing system. +// so no need to implement real StartSpanFromTraceParent method. +func (l LoggingTracer) StartSpanFromTraceParent(ctx context.Context, operationName string, _, _ string) Span { + return l.StartSpan(ctx, operationName) +} + type loggingSpan struct { logger logr.Logger operationName string @@ -54,3 +61,11 @@ func baggageToVals(baggage map[string]any) []any { } return result } + +func (s loggingSpan) TraceID() string { + return "" +} + +func (s loggingSpan) SpanID() string { + return "" +} diff --git a/pkg/utils/tracing/logging_test.go b/pkg/utils/tracing/logging_test.go index f9346550f..c16b91fb7 100644 --- a/pkg/utils/tracing/logging_test.go +++ b/pkg/utils/tracing/logging_test.go @@ -1,6 +1,7 @@ package tracing import ( + "context" "testing" "github.com/go-logr/logr" @@ -22,7 +23,7 @@ func TestLoggingTracer(t *testing.T) { tr := NewLoggingTracer(logr.New(l)) - span := tr.StartSpan("my-operation") + span := tr.StartSpan(context.Background(), "my-operation") span.SetBaggageItem("my-key", "my-value") span.Finish() } diff --git a/pkg/utils/tracing/nop.go b/pkg/utils/tracing/nop.go index e39b67b99..7a28567ac 100644 --- a/pkg/utils/tracing/nop.go +++ b/pkg/utils/tracing/nop.go @@ -1,5 +1,7 @@ package tracing +import "context" + var ( _ Tracer = NopTracer{} _ Span = nopSpan{} @@ -7,7 +9,11 @@ var ( type NopTracer struct{} -func (n NopTracer) StartSpan(_ string) Span { +func (n NopTracer) StartSpan(_ context.Context, _ string) Span { + return nopSpan{} +} + +func (n NopTracer) StartSpanFromTraceParent(_ context.Context, _, _, _ string) Span { return nopSpan{} } @@ -18,3 +24,11 @@ func (n nopSpan) SetBaggageItem(_ string, _ any) { func (n nopSpan) Finish() { } + +func (n nopSpan) TraceID() string { + return "" +} + +func (n nopSpan) SpanID() string { + return "" +} diff --git a/pkg/utils/tracing/opentelemetry.go b/pkg/utils/tracing/opentelemetry.go new file mode 100644 index 000000000..c4ab5a2d3 --- /dev/null +++ b/pkg/utils/tracing/opentelemetry.go @@ -0,0 +1,56 @@ +package tracing + +import ( + "context" + + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" +) + +type OpenTelemetryTracer struct { + realTracer trace.Tracer +} + +func NewOpenTelemetryTracer(t trace.Tracer) Tracer { + return &OpenTelemetryTracer{ + realTracer: t, + } +} + +func (t OpenTelemetryTracer) StartSpan(ctx context.Context, operationName string) Span { + _, realspan := t.realTracer.Start(ctx, operationName) + return openTelemetrySpan{realSpan: realspan} +} + +func (t OpenTelemetryTracer) StartSpanFromTraceParent(ctx context.Context, operationName string, parentTraceId, parentSpanId string) Span { + traceID, _ := trace.TraceIDFromHex(parentTraceId) + parentSpanID, _ := trace.SpanIDFromHex(parentSpanId) + spanCtx := trace.NewSpanContext(trace.SpanContextConfig{ + TraceID: traceID, + SpanID: parentSpanID, + TraceFlags: trace.FlagsSampled, + }) + ctx = trace.ContextWithSpanContext(ctx, spanCtx) + _, realSpan := t.realTracer.Start(ctx, operationName) + return openTelemetrySpan{realSpan: realSpan} +} + +type openTelemetrySpan struct { + realSpan trace.Span +} + +func (s openTelemetrySpan) SetBaggageItem(key string, value any) { + s.realSpan.SetAttributes(attribute.Key(key).String(value.(string))) +} + +func (s openTelemetrySpan) Finish() { + s.realSpan.End() +} + +func (s openTelemetrySpan) TraceID() string { + return s.realSpan.SpanContext().TraceID().String() +} + +func (s openTelemetrySpan) SpanID() string { + return s.realSpan.SpanContext().SpanID().String() +}