|
1 | 1 | #include "nix/expr/eval-profiler.hh"
|
2 | 2 | #include "nix/expr/nixexpr.hh"
|
| 3 | +#include "nix/expr/eval.hh" |
| 4 | +#include "nix/util/lru-cache.hh" |
3 | 5 |
|
4 | 6 | namespace nix {
|
5 | 7 |
|
@@ -45,4 +47,267 @@ void MultiEvalProfiler::addProfiler(ref<EvalProfiler> profiler)
|
45 | 47 | invalidateNeededHooks();
|
46 | 48 | }
|
47 | 49 |
|
| 50 | +namespace { |
| 51 | + |
| 52 | +class PosCache : private LRUCache<PosIdx, Pos> |
| 53 | +{ |
| 54 | + const EvalState & state; |
| 55 | + |
| 56 | +public: |
| 57 | + PosCache(const EvalState & state) |
| 58 | + : LRUCache(524288) /* ~40MiB */ |
| 59 | + , state(state) |
| 60 | + { |
| 61 | + } |
| 62 | + |
| 63 | + Pos lookup(PosIdx posIdx) |
| 64 | + { |
| 65 | + auto posOrNone = LRUCache::get(posIdx); |
| 66 | + if (posOrNone) |
| 67 | + return *posOrNone; |
| 68 | + |
| 69 | + auto pos = state.positions[posIdx]; |
| 70 | + upsert(posIdx, pos); |
| 71 | + return pos; |
| 72 | + } |
| 73 | +}; |
| 74 | + |
| 75 | +struct LambdaFrameInfo |
| 76 | +{ |
| 77 | + ExprLambda * expr; |
| 78 | + /** Position where the lambda has been called. */ |
| 79 | + PosIdx callPos = noPos; |
| 80 | + std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const; |
| 81 | + auto operator<=>(const LambdaFrameInfo & rhs) const = default; |
| 82 | +}; |
| 83 | + |
| 84 | +/** Primop call. */ |
| 85 | +struct PrimOpFrameInfo |
| 86 | +{ |
| 87 | + const PrimOp * expr; |
| 88 | + /** Position where the primop has been called. */ |
| 89 | + PosIdx callPos = noPos; |
| 90 | + std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const; |
| 91 | + auto operator<=>(const PrimOpFrameInfo & rhs) const = default; |
| 92 | +}; |
| 93 | + |
| 94 | +/** Used for functor calls (attrset with __functor attr). */ |
| 95 | +struct FunctorFrameInfo |
| 96 | +{ |
| 97 | + PosIdx pos; |
| 98 | + std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const; |
| 99 | + auto operator<=>(const FunctorFrameInfo & rhs) const = default; |
| 100 | +}; |
| 101 | + |
| 102 | +/** Fallback frame info. */ |
| 103 | +struct GenericFrameInfo |
| 104 | +{ |
| 105 | + PosIdx pos; |
| 106 | + std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const; |
| 107 | + auto operator<=>(const GenericFrameInfo & rhs) const = default; |
| 108 | +}; |
| 109 | + |
| 110 | +using FrameInfo = std::variant<LambdaFrameInfo, PrimOpFrameInfo, FunctorFrameInfo, GenericFrameInfo>; |
| 111 | +using FrameStack = std::vector<FrameInfo>; |
| 112 | + |
| 113 | +/** |
| 114 | + * Stack sampling profiler. |
| 115 | + */ |
| 116 | +class SampleStack : public EvalProfiler |
| 117 | +{ |
| 118 | + /* How often stack profiles should be flushed to file. This avoids the need |
| 119 | + to persist stack samples across the whole evaluation at the cost |
| 120 | + of periodically flushing data to disk. */ |
| 121 | + static constexpr std::chrono::microseconds profileDumpInterval = std::chrono::milliseconds(2000); |
| 122 | + |
| 123 | + Hooks getNeededHooksImpl() const override |
| 124 | + { |
| 125 | + return Hooks().set(preFunctionCall).set(postFunctionCall); |
| 126 | + } |
| 127 | + |
| 128 | +public: |
| 129 | + SampleStack(const EvalState & state, std::filesystem::path profileFile, std::chrono::nanoseconds period) |
| 130 | + : state(state) |
| 131 | + , sampleInterval(period) |
| 132 | + , profileFd([&]() { |
| 133 | + AutoCloseFD fd = toDescriptor(open(profileFile.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0660)); |
| 134 | + if (!fd) |
| 135 | + throw SysError("opening file %s", profileFile); |
| 136 | + return fd; |
| 137 | + }()) |
| 138 | + , posCache(state) |
| 139 | + { |
| 140 | + } |
| 141 | + |
| 142 | + [[gnu::noinline]] void |
| 143 | + preFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos) override; |
| 144 | + [[gnu::noinline]] void |
| 145 | + postFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos) override; |
| 146 | + |
| 147 | + void maybeSaveProfile(std::chrono::time_point<std::chrono::high_resolution_clock> now); |
| 148 | + void saveProfile(); |
| 149 | + FrameInfo getFrameInfoFromValueAndPos(const Value & v, PosIdx pos); |
| 150 | + |
| 151 | + SampleStack(SampleStack &&) = default; |
| 152 | + SampleStack & operator=(SampleStack &&) = delete; |
| 153 | + SampleStack(const SampleStack &) = delete; |
| 154 | + SampleStack & operator=(const SampleStack &) = delete; |
| 155 | + ~SampleStack(); |
| 156 | + |
| 157 | +private: |
| 158 | + /** Hold on to an instance of EvalState for symbolizing positions. */ |
| 159 | + const EvalState & state; |
| 160 | + std::chrono::nanoseconds sampleInterval; |
| 161 | + AutoCloseFD profileFd; |
| 162 | + FrameStack stack; |
| 163 | + std::map<FrameStack, uint32_t> callCount; |
| 164 | + std::chrono::time_point<std::chrono::high_resolution_clock> lastStackSample = |
| 165 | + std::chrono::high_resolution_clock::now(); |
| 166 | + std::chrono::time_point<std::chrono::high_resolution_clock> lastDump = std::chrono::high_resolution_clock::now(); |
| 167 | + PosCache posCache; |
| 168 | +}; |
| 169 | + |
| 170 | +FrameInfo SampleStack::getFrameInfoFromValueAndPos(const Value & v, PosIdx pos) |
| 171 | +{ |
| 172 | + /* NOTE: No actual references to garbage collected values are not held in |
| 173 | + the profiler. */ |
| 174 | + if (v.isLambda()) |
| 175 | + return LambdaFrameInfo{.expr = v.payload.lambda.fun, .callPos = pos}; |
| 176 | + else if (v.isPrimOp()) |
| 177 | + return PrimOpFrameInfo{.expr = v.primOp(), .callPos = pos}; |
| 178 | + else if (v.isPrimOpApp()) |
| 179 | + /* Resolve primOp eagerly. Must not hold on to a reference to a Value. */ |
| 180 | + return PrimOpFrameInfo{.expr = v.primOpAppPrimOp(), .callPos = pos}; |
| 181 | + else if (state.isFunctor(v)) { |
| 182 | + const auto functor = v.attrs()->get(state.sFunctor); |
| 183 | + if (auto pos_ = posCache.lookup(pos); std::holds_alternative<std::monostate>(pos_.origin)) |
| 184 | + /* HACK: In case callsite position is unresolved. */ |
| 185 | + return FunctorFrameInfo{.pos = functor->pos}; |
| 186 | + return FunctorFrameInfo{.pos = pos}; |
| 187 | + } else |
| 188 | + /* NOTE: Add a stack frame even for invalid cases (e.g. when calling a non-function). This is what |
| 189 | + * trace-function-calls does. */ |
| 190 | + return GenericFrameInfo{.pos = pos}; |
| 191 | +} |
| 192 | + |
| 193 | +[[gnu::noinline]] void SampleStack::preFunctionCallHook( |
| 194 | + const EvalState & state, const Value & v, [[maybe_unused]] std::span<Value *> args, const PosIdx pos) |
| 195 | +{ |
| 196 | + stack.push_back(getFrameInfoFromValueAndPos(v, pos)); |
| 197 | + |
| 198 | + auto now = std::chrono::high_resolution_clock::now(); |
| 199 | + |
| 200 | + if (now - lastStackSample > sampleInterval) { |
| 201 | + callCount[stack] += 1; |
| 202 | + lastStackSample = now; |
| 203 | + } |
| 204 | + |
| 205 | + /* Do this in preFunctionCallHook because we might throw an exception, but |
| 206 | + callFunction uses Finally, which doesn't play well with exceptions. */ |
| 207 | + maybeSaveProfile(now); |
| 208 | +} |
| 209 | + |
| 210 | +[[gnu::noinline]] void |
| 211 | +SampleStack::postFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos) |
| 212 | +{ |
| 213 | + |
| 214 | + if (!stack.empty()) |
| 215 | + stack.pop_back(); |
| 216 | +} |
| 217 | + |
| 218 | +std::ostream & LambdaFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const |
| 219 | +{ |
| 220 | + if (auto pos = posCache.lookup(callPos); std::holds_alternative<std::monostate>(pos.origin)) |
| 221 | + /* HACK: To avoid dubious «none»:0 in the generated profile if the origin can't be resolved |
| 222 | + resort to printing the lambda location instead of the callsite position. */ |
| 223 | + os << posCache.lookup(expr->getPos()); |
| 224 | + else |
| 225 | + os << pos; |
| 226 | + if (expr->name) |
| 227 | + os << ":" << state.symbols[expr->name]; |
| 228 | + return os; |
| 229 | +} |
| 230 | + |
| 231 | +std::ostream & GenericFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const |
| 232 | +{ |
| 233 | + os << posCache.lookup(pos); |
| 234 | + return os; |
| 235 | +} |
| 236 | + |
| 237 | +std::ostream & FunctorFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const |
| 238 | +{ |
| 239 | + os << posCache.lookup(pos) << ":functor"; |
| 240 | + return os; |
| 241 | +} |
| 242 | + |
| 243 | +std::ostream & PrimOpFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const |
| 244 | +{ |
| 245 | + /* Sometimes callsite position can have an unresolved origin, which |
| 246 | + leads to confusing «none»:0 locations in the profile. */ |
| 247 | + auto pos = posCache.lookup(callPos); |
| 248 | + if (!std::holds_alternative<std::monostate>(pos.origin)) |
| 249 | + os << posCache.lookup(callPos) << ":"; |
| 250 | + os << *expr; |
| 251 | + return os; |
| 252 | +} |
| 253 | + |
| 254 | +void SampleStack::maybeSaveProfile(std::chrono::time_point<std::chrono::high_resolution_clock> now) |
| 255 | +{ |
| 256 | + if (now - lastDump >= profileDumpInterval) |
| 257 | + saveProfile(); |
| 258 | + else |
| 259 | + return; |
| 260 | + |
| 261 | + /* Save the last dump timepoint. Do this after actually saving data to file |
| 262 | + to not account for the time doing the flushing to disk. */ |
| 263 | + lastDump = std::chrono::high_resolution_clock::now(); |
| 264 | + |
| 265 | + /* Free up memory used for stack sampling. This might be very significant for |
| 266 | + long-running evaluations, so we shouldn't hog too much memory. */ |
| 267 | + callCount.clear(); |
| 268 | +} |
| 269 | + |
| 270 | +void SampleStack::saveProfile() |
| 271 | +{ |
| 272 | + auto os = std::ostringstream{}; |
| 273 | + for (auto & [stack, count] : callCount) { |
| 274 | + auto first = true; |
| 275 | + for (auto & pos : stack) { |
| 276 | + if (first) |
| 277 | + first = false; |
| 278 | + else |
| 279 | + os << ";"; |
| 280 | + |
| 281 | + std::visit([&](auto && info) { info.symbolize(state, os, posCache); }, pos); |
| 282 | + } |
| 283 | + os << " " << count; |
| 284 | + writeLine(profileFd.get(), std::move(os).str()); |
| 285 | + /* Clear ostringstream. */ |
| 286 | + os.str(""); |
| 287 | + os.clear(); |
| 288 | + } |
| 289 | +} |
| 290 | + |
| 291 | +SampleStack::~SampleStack() |
| 292 | +{ |
| 293 | + /* Guard against cases when we are already unwinding the stack. */ |
| 294 | + try { |
| 295 | + saveProfile(); |
| 296 | + } catch (...) { |
| 297 | + ignoreExceptionInDestructor(); |
| 298 | + } |
| 299 | +} |
| 300 | + |
| 301 | +} // namespace |
| 302 | + |
| 303 | +ref<EvalProfiler> |
| 304 | +makeSampleStackProfiler(const EvalState & state, std::filesystem::path profileFile, uint64_t frequency) |
| 305 | +{ |
| 306 | + /* 0 is a special value for sampling stack after each call. */ |
| 307 | + std::chrono::nanoseconds period = frequency == 0 |
| 308 | + ? std::chrono::nanoseconds{0} |
| 309 | + : std::chrono::nanoseconds{std::nano::den / frequency / std::nano::num}; |
| 310 | + return make_ref<SampleStack>(state, profileFile, period); |
| 311 | +} |
| 312 | + |
48 | 313 | }
|
0 commit comments