Skip to content
This repository was archived by the owner on Jan 21, 2025. It is now read-only.

Commit

Permalink
reporter: attach thread ID to emitted profiles (#20)
Browse files Browse the repository at this point in the history
get thread ID from bpf_get_current_uid_gid (based on
https://man7.org/linux/man-pages/man7/bpf-helpers.7.html)

and pass it down through the trace to the reporter. attach it to
the emitted pprof as a label
  • Loading branch information
Gandem committed Jun 28, 2024
1 parent e4fe4cd commit 495b053
Show file tree
Hide file tree
Showing 13 changed files with 29 additions and 6 deletions.
1 change: 1 addition & 0 deletions host/host.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ type Trace struct {
Hash TraceHash
KTime util.KTime
PID util.PID
TID util.TID
APMTraceID libpf.APMTraceID
APMTransactionID libpf.APMTransactionID
}
1 change: 1 addition & 0 deletions libpf/libpf.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ type TraceAndCounts struct {
ContainerName string
APMServiceName string
PID util.PID
TID util.TID
}

type FrameMetadata struct {
Expand Down
11 changes: 10 additions & 1 deletion reporter/datadog_reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ type DatadogReporter struct {

// ReportTraceEvent enqueues reported trace events for the Datadog reporter.
func (r *DatadogReporter) ReportTraceEvent(trace *libpf.Trace, timestamp libpf.UnixTime64,
comm, podName, containerID, containerName, apmServiceName string, pid util.PID) {
comm, podName, containerID, containerName, apmServiceName string,
pid util.PID, tid util.TID) {
traceEvents := r.traceEvents.WLock()
defer r.traceEvents.WUnlock(&traceEvents)

Expand All @@ -95,6 +96,7 @@ func (r *DatadogReporter) ReportTraceEvent(trace *libpf.Trace, timestamp libpf.U
containerName: containerName,
apmServiceName: apmServiceName,
pid: pid,
tid: tid,
timestamps: []uint64{uint64(timestamp)},
}
}
Expand Down Expand Up @@ -567,6 +569,13 @@ func addTraceLabels(labels map[string][]string, i traceFramesCounts) {
if i.pid != 0 {
labels["process_id"] = append(labels["process_id"], fmt.Sprintf("%d", i.pid))
}

if i.tid != 0 {
// The naming has an impact on the backend side,
// this is why we use "thread id" instead of "thread_id"
// This is also consistent with ddprof.
labels["thread id"] = append(labels["thread id"], fmt.Sprintf("%d", i.tid))
}
}

// getDummyMappingIndex inserts or looks up a dummy entry for interpreted FileIDs.
Expand Down
3 changes: 2 additions & 1 deletion reporter/iface.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ type TraceReporter interface {
// and caches it for reporting to the backend. It returns true if the event was
// enqueued for reporting, and false if the event was ignored.
ReportTraceEvent(trace *libpf.Trace, timestamp libpf.UnixTime64,
comm, podName, containerID, containerName, apmServiceName string, pid util.PID)
comm, podName, containerID, containerName, apmServiceName string,
pid util.PID, tid util.TID)

// SupportsReportTraceEvent returns true if the reporter supports reporting trace events
// via ReportTraceEvent().
Expand Down
5 changes: 4 additions & 1 deletion reporter/otlp_reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ type traceFramesCounts struct {
containerName string
apmServiceName string
pid util.PID
tid util.TID
timestamps []uint64 // in nanoseconds
}

Expand Down Expand Up @@ -125,7 +126,7 @@ func (r *OTLPReporter) SupportsReportTraceEvent() bool { return true }
// ReportTraceEvent enqueues reported trace events for the OTLP reporter.
func (r *OTLPReporter) ReportTraceEvent(trace *libpf.Trace,
timestamp libpf.UnixTime64, comm, podName, containerID,
containerName, apmServiceName string, pid util.PID) {
containerName, apmServiceName string, pid util.PID, tid util.TID) {
traceEvents := r.traceEvents.WLock()
defer r.traceEvents.WUnlock(&traceEvents)

Expand All @@ -145,6 +146,7 @@ func (r *OTLPReporter) ReportTraceEvent(trace *libpf.Trace,
containerName: containerName,
apmServiceName: apmServiceName,
pid: pid,
tid: tid,
timestamps: []uint64{uint64(timestamp)},
}
}
Expand Down Expand Up @@ -708,6 +710,7 @@ func getSampleAttributes(profile *profiles.Profile, i traceFramesCounts) []uint6
addAttr(semconv.ThreadNameKey, i.comm)
addAttr(semconv.ServiceNameKey, i.apmServiceName)
addAttr("process_id", strconv.Itoa(int(i.pid)))
addAttr(semconv.ThreadIDKey, strconv.Itoa(int(i.tid)))

return indices
}
Expand Down
2 changes: 2 additions & 0 deletions support/ebpf/native_stack_trace.ebpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,7 @@ static inline
int collect_trace(struct pt_regs *ctx) {
// Get the PID and TGID register.
u64 id = bpf_get_current_pid_tgid();
u64 tid = id & 0xFFFFFFFF;
u64 pid = id >> 32;

if (pid == 0) {
Expand All @@ -824,6 +825,7 @@ int collect_trace(struct pt_regs *ctx) {
}

Trace *trace = &record->trace;
trace->tid = tid;
trace->pid = pid;
trace->ktime = ktime;
if (bpf_get_current_comm(&(trace->comm), sizeof(trace->comm)) < 0) {
Expand Down
Binary file modified support/ebpf/tracer.ebpf.arm64
Binary file not shown.
Binary file modified support/ebpf/tracer.ebpf.x86
Binary file not shown.
2 changes: 2 additions & 0 deletions support/ebpf/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,8 @@ typedef struct __attribute__((packed)) ApmCorrelationBuf {
typedef struct Trace {
// The process ID
u32 pid;
// The thread ID
u32 tid;
// Monotonic kernel time in nanosecond precision.
u64 ktime;
// The current COMM of the thread of this Trace.
Expand Down
2 changes: 1 addition & 1 deletion tracehandler/tracehandler.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ func (m *traceHandler) HandleTrace(bpfTrace *host.Trace) {
if m.reporter.SupportsReportTraceEvent() {
m.reporter.ReportTraceEvent(umTrace, timestamp,
bpfTrace.Comm, meta.PodName, meta.ContainerID, meta.ContainerName,
svcName, bpfTrace.PID)
svcName, bpfTrace.PID, bpfTrace.TID)
return
}
m.reporter.ReportCountForTrace(umTrace.Hash, timestamp, 1,
Expand Down
2 changes: 1 addition & 1 deletion tracehandler/tracehandler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ func (m *mockReporter) ReportCountForTrace(traceHash libpf.TraceHash,
func (m *mockReporter) SupportsReportTraceEvent() bool { return false }

func (m *mockReporter) ReportTraceEvent(_ *libpf.Trace,
_ libpf.UnixTime64, _, _, _, _, _ string, _ util.PID) {
_ libpf.UnixTime64, _, _, _, _, _ string, _ util.PID, _ util.TID) {
}

type mockContainerMetadataHandler struct{}
Expand Down
3 changes: 2 additions & 1 deletion tracer/tracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -853,13 +853,14 @@ func (t *Tracer) loadBpfTrace(raw []byte) *host.Trace {
APMTraceID: *(*libpf.APMTraceID)(unsafe.Pointer(&ptr.apm_trace_id)),
APMTransactionID: *(*libpf.APMTransactionID)(unsafe.Pointer(&ptr.apm_transaction_id)),
PID: util.PID(ptr.pid),
TID: util.TID(ptr.tid),
KTime: util.KTime(ptr.ktime),
}

// Trace fields included in the hash:
// - PID, kernel stack ID, length & frame array
// Intentionally excluded:
// - ktime, COMM, APM trace, APM transaction ID
// - ktime, COMM, TID, APM trace, APM transaction ID
ptr.comm = [16]C.char{}
ptr.apm_trace_id = C.ApmTraceID{}
ptr.apm_transaction_id = C.ApmSpanID{}
Expand Down
3 changes: 3 additions & 0 deletions util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ func (p PID) Hash32() uint32 {
return uint32(p)
}

// TID represent a thread ID
type TID int32

// HashString turns a string into a 64-bit hash.
func HashString(s string) uint64 {
h := fnv.New64a()
Expand Down

0 comments on commit 495b053

Please sign in to comment.