Skip to content

Commit 113be6c

Browse files
xokdviumMic92
andcommitted
libexpr: Add SampleStack stack-sampling profiler
This patch adds support for a native stack sampling profiler to the evaluator, which saves a collapsed stack profile information to a configurable location. Introduced options (in `EvalSettings`): - `eval-profile-file` - path to the collected profile file. - `eval-profiler-sample-period` - sampling frequency. - `eval-profiler` - enumeration option for enabling the profiler. Currently only `flamegraph` is supported, but having this an enumeration rather than a boolean switch leaves the door open for other profiler variants (e.g. tracy). Profile includes the following information on best-effort basis (e.g. some lambdas might have an undefined name). Callstack information contains: - Call site location (where the function gets called). - Primop/lambda name of the function being called. - Functors/partial applications don't have a name attached to them unlike special-cased primops and lambads. For cases where callsite location isn't available we have to resort to providing the location where the lambda itself is defined. This removes some of the confusing `«none»:0` locations in the profile from #11373. Example usage with piping directly into zstd for compression: ``` nix eval --no-eval-cache nixpkgs#nixosTests.gnome \ --option eval-profiler flamegraph \ --option eval-profile-file >(zstd -of nix.profile.zstd) ``` This builds upon #11373. Co-authored-by: Jörg Thalheim <[email protected]>
1 parent e7078d4 commit 113be6c

9 files changed

+374
-6
lines changed

src/libexpr/eval-profiler-settings.cc

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#include "nix/expr/eval-profiler-settings.hh"
2+
#include "nix/util/configuration.hh"
3+
#include "nix/util/logging.hh" /* Needs to be included before config-impl.hh */
4+
#include "nix/util/config-impl.hh"
5+
#include "nix/util/abstract-setting-to-json.hh"
6+
7+
#include <nlohmann/json.hpp>
8+
9+
namespace nix {
10+
11+
template<>
12+
EvalProfilerMode BaseSetting<EvalProfilerMode>::parse(const std::string & str) const
13+
{
14+
if (str == "disabled")
15+
return epmDisabled;
16+
else if (str == "flamegraph")
17+
return epmFlamegraph;
18+
else
19+
throw UsageError("option '%s' has invalid value '%s'", name, str);
20+
}
21+
22+
template<>
23+
struct BaseSetting<EvalProfilerMode>::trait
24+
{
25+
static constexpr bool appendable = false;
26+
};
27+
28+
template<>
29+
std::string BaseSetting<EvalProfilerMode>::to_string() const
30+
{
31+
if (value == epmDisabled)
32+
return "disabled";
33+
else if (value == epmFlamegraph)
34+
return "flamegraph";
35+
else
36+
unreachable();
37+
}
38+
39+
NLOHMANN_JSON_SERIALIZE_ENUM(
40+
EvalProfilerMode,
41+
{
42+
{EvalProfilerMode::epmDisabled, "disabled"},
43+
{EvalProfilerMode::epmFlamegraph, "flamegraph"},
44+
});
45+
46+
/* Explicit instantian of templates */
47+
template class BaseSetting<EvalProfilerMode>;
48+
49+
}

src/libexpr/eval-profiler.cc

Lines changed: 271 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include "nix/expr/eval-profiler.hh"
22
#include "nix/expr/nixexpr.hh"
3+
#include "nix/expr/eval.hh"
4+
#include "nix/util/lru-cache.hh"
35

46
namespace nix {
57

@@ -45,4 +47,273 @@ void MultiEvalProfiler::addProfiler(ref<EvalProfiler> profiler)
4547
invalidateNeededHooks();
4648
}
4749

50+
namespace {
51+
52+
class PosCache : private LRUCache<PosIdx, Pos>
53+
{
54+
const EvalState & state;
55+
56+
public:
57+
PosCache(const EvalState & state)
58+
: LRUCache(524288) /* ~40MiB */
59+
, state(state)
60+
{
61+
}
62+
63+
Pos lookup(PosIdx posIdx)
64+
{
65+
auto posOrNone = LRUCache::get(posIdx);
66+
if (posOrNone)
67+
return *posOrNone;
68+
69+
auto pos = state.positions[posIdx];
70+
upsert(posIdx, pos);
71+
return pos;
72+
}
73+
};
74+
75+
struct LambdaFrameInfo
76+
{
77+
ExprLambda * expr;
78+
/** Position where the lambda has been called. */
79+
PosIdx callPos = noPos;
80+
std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const;
81+
auto operator<=>(const LambdaFrameInfo & rhs) const = default;
82+
};
83+
84+
/** Primop call. */
85+
struct PrimOpFrameInfo
86+
{
87+
const PrimOp * expr;
88+
/** Position where the primop has been called. */
89+
PosIdx callPos = noPos;
90+
std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const;
91+
auto operator<=>(const PrimOpFrameInfo & rhs) const = default;
92+
};
93+
94+
/** Used for functor calls (attrset with __functor attr). */
95+
struct FunctorFrameInfo
96+
{
97+
PosIdx pos;
98+
std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const;
99+
auto operator<=>(const FunctorFrameInfo & rhs) const = default;
100+
};
101+
102+
/** Fallback frame info. */
103+
struct GenericFrameInfo
104+
{
105+
PosIdx pos;
106+
std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const;
107+
auto operator<=>(const GenericFrameInfo & rhs) const = default;
108+
};
109+
110+
using FrameInfo = std::variant<LambdaFrameInfo, PrimOpFrameInfo, FunctorFrameInfo, GenericFrameInfo>;
111+
using FrameStack = std::vector<FrameInfo>;
112+
113+
/**
114+
* Stack sampling profiler which dumps the collected profile on destruction.
115+
*
116+
* @todo This should probably save the profile somewhere other than the
117+
* dtor, but its hard to consistently do that for all cases when EvalState
118+
* goes out of scope.
119+
*/
120+
class SampleStack : public EvalProfiler
121+
{
122+
/* How often stack profiles should be flushed to file. This avoids the need
123+
to persist stack samples across the whole evaluation at the cost
124+
of periodically flushing data to disk. */
125+
static constexpr std::chrono::microseconds profileDumpInterval = std::chrono::milliseconds(2000);
126+
127+
Hooks getNeededHooksImpl() const override
128+
{
129+
return Hooks().set(preFunctionCall).set(postFunctionCall);
130+
}
131+
132+
public:
133+
SampleStack(const EvalState & state, std::filesystem::path profileFile, std::chrono::microseconds period)
134+
: state(state)
135+
, sampleInterval(period)
136+
, profileFd([&]() {
137+
AutoCloseFD fd = toDescriptor(open(profileFile.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0660));
138+
if (!fd)
139+
throw SysError("opening file '%s'", profileFile);
140+
return fd;
141+
}())
142+
, posCache(state)
143+
{
144+
}
145+
146+
[[gnu::noinline]] void
147+
preFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos) override;
148+
[[gnu::noinline]] void
149+
postFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos) override;
150+
151+
void maybeSaveProfile(std::chrono::time_point<std::chrono::high_resolution_clock> now);
152+
void saveProfile();
153+
FrameInfo getFrameInfoFromValueAndPos(const Value & v, PosIdx pos);
154+
155+
SampleStack(SampleStack &&) = default;
156+
SampleStack & operator=(SampleStack &&) = delete;
157+
SampleStack(const SampleStack &) = delete;
158+
SampleStack & operator=(const SampleStack &) = delete;
159+
~SampleStack();
160+
161+
private:
162+
/** Hold on to an instance of EvalState for symbolizing positions. */
163+
const EvalState & state;
164+
std::chrono::microseconds sampleInterval;
165+
AutoCloseFD profileFd;
166+
FrameStack stack;
167+
std::map<FrameStack, uint32_t> callCount;
168+
std::chrono::time_point<std::chrono::high_resolution_clock> lastStackSample =
169+
std::chrono::high_resolution_clock::now();
170+
std::chrono::time_point<std::chrono::high_resolution_clock> lastDump = std::chrono::high_resolution_clock::now();
171+
PosCache posCache;
172+
};
173+
174+
FrameInfo SampleStack::getFrameInfoFromValueAndPos(const Value & v, PosIdx pos)
175+
{
176+
/* NOTE: No actual references to garbage collected values are not held in
177+
the profiler. */
178+
FrameInfo info;
179+
if (v.isLambda())
180+
return LambdaFrameInfo{.expr = v.payload.lambda.fun, .callPos = pos};
181+
else if (v.isPrimOp())
182+
return PrimOpFrameInfo{.expr = v.primOp(), .callPos = pos};
183+
else if (v.isPrimOpApp())
184+
/* Resolve primOp eagerly. Must not hold on to a reference to a Value. */
185+
return PrimOpFrameInfo{.expr = v.primOpAppPrimOp(), .callPos = pos};
186+
else if (state.isFunctor(v)) {
187+
const auto functor = v.attrs()->get(state.sFunctor);
188+
if (auto pos_ = posCache.lookup(pos); std::holds_alternative<std::monostate>(pos_.origin))
189+
/* HACK: In case callsite position is unresolved. */
190+
return FunctorFrameInfo{.pos = functor->pos};
191+
return FunctorFrameInfo{.pos = pos};
192+
} else
193+
/* TODO: What to do with when trying to call an a non-function? This
194+
doesn't matter much, but the profiler should not crash evaluation, so
195+
add a frame regardless. This is what trace-function-calls does. */
196+
return GenericFrameInfo{.pos = pos};
197+
}
198+
199+
[[gnu::noinline]] void SampleStack::preFunctionCallHook(
200+
const EvalState & state, const Value & v, [[maybe_unused]] std::span<Value *> args, const PosIdx pos)
201+
{
202+
stack.push_back(getFrameInfoFromValueAndPos(v, pos));
203+
auto now = std::chrono::high_resolution_clock::now();
204+
/* Do this in preFunctionCallHook because we might throw an exception, but
205+
callFunction uses Finally, which doesn't play well with exceptions. */
206+
maybeSaveProfile(now);
207+
}
208+
209+
[[gnu::noinline]] void
210+
SampleStack::postFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos)
211+
{
212+
auto now = std::chrono::high_resolution_clock::now();
213+
214+
if (now - lastStackSample > sampleInterval) {
215+
callCount[stack] += 1;
216+
lastStackSample = now;
217+
}
218+
219+
if (!stack.empty())
220+
stack.pop_back();
221+
}
222+
223+
std::ostream & LambdaFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const
224+
{
225+
if (auto pos = posCache.lookup(callPos); std::holds_alternative<std::monostate>(pos.origin))
226+
/* HACK: To avoid dubious «none»:0 in the generated profile if the origin can't be resolved
227+
resort to printing the lambda location instead of the callsite position. */
228+
os << posCache.lookup(expr->getPos());
229+
else
230+
os << pos;
231+
if (expr->name)
232+
os << ":" << state.symbols[expr->name];
233+
return os;
234+
}
235+
236+
std::ostream & GenericFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const
237+
{
238+
os << posCache.lookup(pos);
239+
return os;
240+
}
241+
242+
std::ostream & FunctorFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const
243+
{
244+
os << posCache.lookup(pos) << ":functor";
245+
return os;
246+
}
247+
248+
std::ostream & PrimOpFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const
249+
{
250+
/* Sometimes callsite position can have an unresolved origin, which
251+
leads to confusing «none»:0 locations in the profile. */
252+
auto pos = posCache.lookup(callPos);
253+
if (!std::holds_alternative<std::monostate>(pos.origin))
254+
os << posCache.lookup(callPos) << ":";
255+
os << *expr;
256+
return os;
257+
}
258+
259+
void SampleStack::maybeSaveProfile(std::chrono::time_point<std::chrono::high_resolution_clock> now)
260+
{
261+
if (now - lastDump > profileDumpInterval)
262+
saveProfile();
263+
else
264+
return;
265+
266+
/* Save the last dump timepoint. Do this after actually saving data to file
267+
to not account for the time doing the flushing to disk. */
268+
lastDump = std::chrono::system_clock::now();
269+
270+
/* Free up memory used for stack sampling. This might be very significant for
271+
long-running evaluations, so we shouldn't hog too much memory. */
272+
callCount.clear();
273+
}
274+
275+
void SampleStack::saveProfile()
276+
{
277+
auto os = std::ostringstream{};
278+
for (auto & [stack, count] : callCount) {
279+
auto first = true;
280+
for (auto & pos : stack) {
281+
if (first)
282+
first = false;
283+
else
284+
os << ";";
285+
286+
std::visit([&](auto && info) { info.symbolize(state, os, posCache); }, pos);
287+
}
288+
os << " " << count;
289+
writeLine(profileFd.get(), std::move(os).str());
290+
/* Clear ostringstream. */
291+
os.str("");
292+
os.clear();
293+
}
294+
}
295+
296+
SampleStack::~SampleStack()
297+
{
298+
/* Guard against cases when we are already unwinding the stack. */
299+
try {
300+
saveProfile();
301+
} catch (std::exception & e) {
302+
/* Can't rethrow because ~EvalState is noexcept. */
303+
try {
304+
warn("failed to save eval profile file: %1%", e.what());
305+
} catch (...) {
306+
/* In case `warn` also throws give up. */
307+
}
308+
}
309+
}
310+
311+
} // namespace
312+
313+
ref<EvalProfiler>
314+
makeSampleStackProfiler(const EvalState & state, std::filesystem::path profileFile, std::chrono::microseconds period)
315+
{
316+
return make_ref<SampleStack>(state, profileFile, period);
317+
}
318+
48319
}

src/libexpr/eval.cc

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -376,13 +376,18 @@ EvalState::EvalState(
376376
/* Register function call tracer. */
377377
if (settings.traceFunctionCalls)
378378
profiler.addProfiler(make_ref<FunctionCallTrace>());
379-
}
380-
381379

382-
EvalState::~EvalState()
383-
{
380+
switch (settings.evalProfilerMode) {
381+
case epmFlamegraph:
382+
profiler.addProfiler(makeSampleStackProfiler(
383+
*this, settings.evalProfileFile.get(), std::chrono::microseconds(settings.evalProfilerPeriod.get())));
384+
break;
385+
case epmDisabled:
386+
break;
387+
}
384388
}
385389

390+
EvalState::~EvalState() { }
386391

387392
void EvalState::allowPath(const Path & path)
388393
{
@@ -2236,7 +2241,7 @@ bool EvalState::forceBool(Value & v, const PosIdx pos, std::string_view errorCtx
22362241
}
22372242

22382243

2239-
bool EvalState::isFunctor(Value & fun)
2244+
bool EvalState::isFunctor(const Value & fun) const
22402245
{
22412246
return fun.type() == nAttrs && fun.attrs()->find(sFunctor) != fun.attrs()->end();
22422247
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#pragma once
2+
///@file
3+
4+
#include "nix/util/configuration.hh"
5+
6+
namespace nix {
7+
8+
typedef enum { epmDisabled, epmFlamegraph } EvalProfilerMode;
9+
10+
template<>
11+
EvalProfilerMode BaseSetting<EvalProfilerMode>::parse(const std::string & str) const;
12+
13+
template<>
14+
std::string BaseSetting<EvalProfilerMode>::to_string() const;
15+
16+
}

src/libexpr/include/nix/expr/eval-profiler.hh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <span>
1212
#include <bitset>
1313
#include <optional>
14+
#include <filesystem>
1415

1516
namespace nix {
1617

@@ -110,4 +111,7 @@ public:
110111
postFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos) override;
111112
};
112113

114+
ref<EvalProfiler>
115+
makeSampleStackProfiler(const EvalState & state, std::filesystem::path profileFile, std::chrono::microseconds period);
116+
113117
}

0 commit comments

Comments
 (0)