Skip to content

Commit

Permalink
Add Rust native symbolization library and C wrapper
Browse files Browse the repository at this point in the history
Co-authored-by: Joel Höner <[email protected]>
Co-authored-by: Victor Michel <[email protected]>
Co-authored-by: Florian Lehner <[email protected]>
Co-authored-by: Tim Rühsen <[email protected]>
Co-authored-by: Davide Girardi <[email protected]>
  • Loading branch information
6 people committed Dec 6, 2024
1 parent 84cce0a commit 74bd1c1
Show file tree
Hide file tree
Showing 61 changed files with 9,834 additions and 7 deletions.
686 changes: 686 additions & 0 deletions Cargo.lock

Large diffs are not rendered by default.

85 changes: 85 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Rust workspace. Allows command like `cargo test` to work anywhere within the
# repo and ensures that all components use the same dependency versions
# (global Cargo.lock).

[workspace]
members = [
"rust-crates/symblib",
"rust-crates/symblib-capi",
]
resolver = "2"

[workspace.package]
version = "0.0.0"
rust-version = "1.77"

[profile.release]
lto = "thin"
codegen-units = 1
panic = "abort"
opt-level = 3
strip = "debuginfo"

[profile.release-unstripped]
inherits = "release"
strip = false
debug = 1

[profile.release-with-asserts]
inherits = "release-unstripped"
overflow-checks = true
debug-assertions = true

[profile.test]
opt-level = 1 # default of 0 is annoyingly slow

[workspace.dependencies]
anyhow = "1"
argh = "0.1"
base64 = "0.22.0"
cpp_demangle = "0.4"
fallible-iterator = "0.3"
flate2 = "1"
memmap2 = "0.9.0"
native-tls = "0.2"
prost = "0.12.1"
prost-build = "0.12.1"
rustc-demangle = "0.1"
serde_json = "1"
sha2 = "0.10"
tempfile = "3"
thiserror = "1"
zstd = "0.13.0"
zydis = "4.1.1"

[workspace.dependencies.gimli]
version = "0.30.0"
default-features = false
features = ["std", "endian-reader", "fallible-iterator"]

[workspace.dependencies.intervaltree]
version = "0.2"
default-features = false
features = ["std"]

[workspace.dependencies.lru]
version = "0.12.0"
default-features = false

[workspace.dependencies.object]
version = "0.36.0"
default-features = false
features = ["std", "read_core", "elf", "macho", "unaligned"]

[workspace.dependencies.serde]
version = "1"
features = ["derive"]

[workspace.dependencies.smallvec]
version = "1"
features = ["const_new", "union", "const_generics", "write"]

[workspace.dependencies.ureq]
version = "2"
default-features = false
features = ["gzip", "native-tls", "native-certs"]
69 changes: 69 additions & 0 deletions rust-crates/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
Rust components
===============

This directory contains the Rust components for symbolization of native traces.
They are built using the `cargo` build system. Please refer to the README
documents in the subdirectories for details.

## Source code documentation

> [!TIP]
>
> If you're trying to familiarize yourself with the codebase, this is heavily
> recommended. All the important documentation and `README`s are included into
> the rustdoc built documentation, and the generated doc is much more structured
> than what you'd get by just browsing through the repository.
The source code is extensively documented with `rustdoc`, which is invoked
through cargo.

```bash
# Build documentation for our Rust crates and open it in a browser window
cargo doc --document-private-items --workspace --open
```

By default, this will open the documentation for `symblib`.

## Import style

Whenever the name of a type or function that is being imported isn't necessarily
unique, we instead import the module that contains it and then use the module
name to qualify the access. This is essentially similar to how things are done
in Golang.

If the item being important has a very significant, unique name within the code-
base, it's also acceptable to import (`use`) that type directly and refer to it
without additional qualification.

<details>
<summary>Examples</summary>

There are many different modules that expose `File` and `Range` types. Import
the module instead and qualify the items with `module::item`.

```rust
use std::fs;
use symblib::objfile;

let a: fs::File = todo!();
let b: objfile::File = todo!();
```

```rust
use std::ops;
use symblib::symbfile;

let a: ops::Range<u64> = todo!();
let b: symbfile::Range = todo!();
```

`GoRuntimeInfo` is a very unique name that is unlikely to cause confusion even
without further qualification. Import item directly.

```rust
use symblib::gosym::GoRuntimeInfo;

let a: GoRuntimeInfo<'static> = todo!();
```

</details>
12 changes: 5 additions & 7 deletions doc/symb-proto/README.md → rust-crates/symb-proto/README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
Elastic symbolization protocol
==============================

## `symbfile` format
symbfile format
===============

`symbfile` is our custom file format for efficiently storing large amounts of
symbol information. A symbfile is a concatenation of length- and message-type
Expand All @@ -20,7 +18,7 @@ We currently use two different symbol information representations:
given address, the user would sweep though the whole symbfile and collect all
ranges that contain the desired address and then order the resulting range
records by their `depth` field. This presents the ground truth for symbol
information.
information.
- **Return pad records ([`ReturnPadV1`])**\
These map a single address to the symbols of a full inline trace. We generate
such records for each instruction following a `call`. The idea here is that
Expand All @@ -31,7 +29,7 @@ We currently use two different symbol information representations:

While the symbfile format would generally also allow mixing both record types
into a single file, we currently always generate a separate symbfile per record
kind.
kind.

More details about the format itself can be found in the documentation comments
of the [protobuf definition][symbfile-proto].
Expand Down Expand Up @@ -95,4 +93,4 @@ explains the failure in greater detail, for example:

`uuid` allows logically connecting user reports and logs: error reports from
the user that contain the UUID allow finding the logs needed for
investigation and debugging.
investigation and debugging.
File renamed without changes.
2 changes: 2 additions & 0 deletions rust-crates/symblib-capi/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
c/demo
go/go
14 changes: 14 additions & 0 deletions rust-crates/symblib-capi/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[package]
name = "symblib-capi"
edition = "2021"
version.workspace = true
rust-version.workspace = true

[lib]
crate-type = ["staticlib", "cdylib"]

[dependencies]
symblib.path = "../symblib"

fallible-iterator.workspace = true
thiserror.workspace = true
4 changes: 4 additions & 0 deletions rust-crates/symblib-capi/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
symblib C API
=============

This crate exposes the public core API of symblib as a C library.
19 changes: 19 additions & 0 deletions rust-crates/symblib-capi/c/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
.PHONY: all clean run-demo

RUST_WORKSPACE_DIR = ../../..
TARGET_DIR = $(RUST_WORKSPACE_DIR)/target/release

all: demo

$(TARGET_DIR)/libsymblib_capi.so: ../src/*.rs
cargo build --release --manifest-path $(RUST_WORKSPACE_DIR)/Cargo.toml

demo: symblib.h demo.c $(TARGET_DIR)/libsymblib_capi.so
cc -g -I. -o $@ demo.c -L$(TARGET_DIR) -lsymblib_capi -ldl

run-demo: demo
LD_LIBRARY_PATH=$(TARGET_DIR) ./demo

clean:
cargo clean --manifest-path $(RUST_CRATE_DIR)/Cargo.toml
rm -f demo
96 changes: 96 additions & 0 deletions rust-crates/symblib-capi/c/demo.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <inttypes.h>
#include "symblib.h"

// Example visitor callback for processing return pads
SymblibStatus retpad_visitor(void* user_data, const SymblibReturnPad* ret_pad) {
printf("\nReturn pad at ELF VA: 0x%08" PRIx64 "\n", ret_pad->elf_va);

// Iterate over each entry in the SymblibReturnPad
for (size_t i = 0; i < ret_pad->entries.len; ++i) {
SymblibReturnPadEntry* entry = &((SymblibReturnPadEntry*)ret_pad->entries.data)[i];
printf("\tEntry %zu:\n", i);
printf("\t\tFunction: %s\n", entry->func ? entry->func : "(null)");
printf("\t\tFile: %s\n", entry->file ? entry->file : "(null)");
printf("\t\tLine: %u\n", entry->line);
}

return 0;
}

// Example visitor callback for processing ranges
SymblibStatus range_visitor(void* user_data, const SymblibRange* range) {
printf("\nSymbol range at ELF VA: 0x08%" PRIx64 "\n", range->elf_va);
printf("\tFunction: %s\n", range->func);
printf("\tFile: %s\n", range->file ? range->file : "(null)");
printf("\tCall File: %s\n", range->call_file ? range->call_file : "(null)");
printf("\tCall Line: %u\n", range->call_line);
printf("\tDepth: %u\n", range->depth);
printf("\tLine Table Length: %zu\n", range->line_table.len);

// Submit the range to the return pad extractor.
SymblibStatus err = symblib_retpadextr_submit(
(SymblibRetPadExtractor*)user_data, range, retpad_visitor, NULL);
if (err != SYMBLIB_OK) {
fprintf(stderr, "Failed to submit range for extraction\n");
return err;
}

return 0;
}

int main(int argc, const char** argv) {
const char* executable;

switch (argc) {
case 0:
return EXIT_FAILURE;
case 1:
// Use this binary.
executable = argv[0];
break;
default:
// Use user-passed file.
executable = argv[1];
}

printf("Starting range extraction for executable: %s\n", executable);

// Initialize the global return pad extractor.
// We use it in the range extractor visitor.
SymblibRetPadExtractor* extr = NULL;
SymblibStatus err = symblib_retpadextr_new(executable, &extr);
if (err != SYMBLIB_OK) {
fprintf(stderr, "Failed to create global SymblibRetPadExtractor\n");
return EXIT_FAILURE;
}
assert(extr != NULL);

// Call the range extraction function with our visitor.
err = symblib_rangeextr(executable, false, range_visitor, extr);
if (err != SYMBLIB_OK) {
fprintf(stderr, "Error during range extraction: %d\n", err);
symblib_retpadextr_free(extr);
return EXIT_FAILURE;
}

// Notify the return pad extractor that we're done.
err = symblib_retpadextr_submit(extr, NULL, retpad_visitor, NULL);
if (err != SYMBLIB_OK) {
fprintf(stderr, "Failed to submit end-of-ranges marker\n");
symblib_retpadextr_free(extr);
return err;
}

printf("\nRange extraction completed successfully.\n");

symblib_retpadextr_free(extr);
return EXIT_SUCCESS;
}
Loading

0 comments on commit 74bd1c1

Please sign in to comment.