diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..522dcc5 --- /dev/null +++ b/build.sh @@ -0,0 +1 @@ +gcc -Wall -shared -fPIC -o gc_benchmarks.so measure_gc_cycles.c diff --git a/measure_gc_cycles.c b/measure_gc_cycles.c new file mode 100644 index 0000000..8c5114c --- /dev/null +++ b/measure_gc_cycles.c @@ -0,0 +1,54 @@ +// Based on `https://man7.org/linux/man-pages/man2/perf_event_open.2.html` + +#include +#include +#include +#include +#include +#include +#include + +long long total_count = 0; +long fd = 0; + +void perf_event_reset(); + +void perf_event_start() +{ + struct perf_event_attr pe; + memset(&pe, 0, sizeof(pe)); + pe.type = PERF_TYPE_HARDWARE; + pe.size = sizeof(pe); + pe.config = PERF_COUNT_HW_CPU_CYCLES; + pe.disabled = 1; + pe.exclude_kernel = 1; + pe.exclude_hv = 1; + + fd = syscall(__NR_perf_event_open, &pe, 0, -1, -1, 0); + if (fd == -1) { + fprintf(stderr, "Error opening perf event\n"); + exit(1); + } + + perf_event_reset(); +} + +void perf_event_reset() +{ + ioctl(fd, PERF_EVENT_IOC_RESET, 0); + ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); +} + + +void perf_event_count() +{ + long long count; + ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); + read(fd, &count, sizeof(count)); + total_count += count; +} + +long long perf_event_get_count() +{ + return total_count; +} diff --git a/run_benchmarks.jl b/run_benchmarks.jl index 0593714..860f0e6 100644 --- a/run_benchmarks.jl +++ b/run_benchmarks.jl @@ -45,10 +45,11 @@ function run_bench(runs, threads, file, show_json = false) times = [] gc_diff = [] gc_end = [] + gc_cycles = [] for _ in 1:runs # uglyness to communicate over non stdout (specifically file descriptor 3) p = Base.PipeEndpoint() - cmd = `$JULIAVER --project=. --threads=$threads $file SERIALIZE` + cmd = `$JULIAVER --project=. --threads=$threads $file SERIALIZE --no-cycles-count` cmd = run(Base.CmdRedirect(cmd, p, 3), stdin, stdout, stderr, wait=false) r = deserialize(p) @assert success(cmd) @@ -57,16 +58,25 @@ function run_bench(runs, threads, file, show_json = false) push!(times, r.times) push!(gc_diff, r.gc_diff) push!(gc_end, r.gc_end) + # run once more to measure cycles + p = Base.PipeEndpoint() + cmd = `$JULIAVER --project=. --threads=$threads $file SERIALIZE --cycles-count` + cmd = run(Base.CmdRedirect(cmd, p, 3), stdin, stdout, stderr, wait=false) + r = deserialize(p) + @assert success(cmd) + # end uglyness + push!(gc_cycles, r.gc_cycles) end total_stats = get_stats(times) ./ 1_000_000 gc_time = get_stats(map(stat->stat.total_time, gc_end)) ./ 1_000_000 + gc_cycles = get_stats(gc_cycles) ./ 1_000_000 mark_time = get_stats(map(stat->stat.total_mark_time, gc_end)) ./ 1_000_000 sweep_time = get_stats(map(stat->stat.total_sweep_time, gc_end)) ./ 1_000_000 max_pause = get_stats(map(stat->stat.max_pause, gc_end)) ./ 1_000_000 time_to_safepoint = get_stats(map(stat->stat.time_to_safepoint, gc_end)) ./ 1_000 max_mem = get_stats(map(stat->stat.max_memory, gc_end)) ./ 1024^2 pct_gc = get_stats(map((t,stat)->(stat.total_time/t), times, gc_diff)) .* 100 - + header = (["", "total time", "gc time", "mark time", "sweep time", "max GC pause", "time to safepoint", "max heap", "percent gc"], ["", "ms", "ms", "ms", "ms", "ms", "us", "MB", "%" ]) labels = ["minimum", "median", "maximum", "stdev"] @@ -85,7 +95,7 @@ function run_bench(runs, threads, file, show_json = false) ("pct gc", pct_gc)]) JSON.print(data) else - data = hcat(labels, total_stats, gc_time, mark_time, sweep_time, max_pause, time_to_safepoint, max_mem, pct_gc) + data = hcat(labels, total_stats, gc_time, gc_cycles, mark_time, sweep_time, max_pause, time_to_safepoint, max_mem, pct_gc) pretty_table(data; header, formatters=ft_printf("%0.0f"), highlighters) end end diff --git a/utils.jl b/utils.jl index 93a32cb..702c729 100644 --- a/utils.jl +++ b/utils.jl @@ -1,7 +1,17 @@ using Pkg Pkg.instantiate() # It is dumb that I have to do this +using Libdl using Serialization +const GC_LIB = "../../../gc_benchmarks.so" +const lib = Libdl.dlopen(GC_LIB) +const sym_start = Libdl.dlsym(lib, :perf_event_start) +const sym_reset = Libdl.dlsym(lib, :perf_event_reset) +const sym_count = Libdl.dlsym(lib, :perf_event_count) +const sym_get_count = Libdl.dlsym(lib, :perf_event_get_count) + +const count_cycles = ARGS[2] + macro gctime(ex) fc = isdefined(Base.Experimental, Symbol("@force_compile")) ? :(Base.Experimental.@force_compile) : @@ -10,6 +20,13 @@ macro gctime(ex) $fc local result try + if count_cycles == "--cycles-count" + ccall(sym_start, Cvoid, ()) + ccall(:jl_gc_set_cb_pre_gc, Cvoid, (Ptr{Cvoid}, Cint), + sym_reset, true) + ccall(:jl_gc_set_cb_post_gc, Cvoid, (Ptr{Cvoid}, Cint), + sym_count, true) + end local start_gc_num = Base.gc_num() local start_time = time_ns() local val = $(esc(ex)) @@ -19,7 +36,8 @@ macro gctime(ex) value = val, times = (end_time - start_time), gc_diff = Base.GC_Diff(end_gc_num, start_gc_num), - gc_end = end_gc_num + gc_end = end_gc_num, + gc_cycles = ccall(sym_get_count, Clonglong, ()), ) catch e @show e @@ -27,7 +45,8 @@ macro gctime(ex) value = e, times = NaN, gc_diff = Base.GC_Diff(end_gc_num, start_gc_num), - gc_end = end_gc_num + gc_end = end_gc_num, + gc_cycles = NaN, ) end if "SERIALIZE" in ARGS