|
1 | 1 | #include "nix/expr/eval-profiler.hh"
|
2 | 2 | #include "nix/expr/nixexpr.hh"
|
| 3 | +#include "nix/expr/eval.hh" |
| 4 | +#include "nix/util/lru-cache.hh" |
3 | 5 |
|
4 | 6 | namespace nix {
|
5 | 7 |
|
@@ -45,4 +47,273 @@ void MultiEvalProfiler::addProfiler(ref<EvalProfiler> profiler)
|
45 | 47 | invalidateNeededHooks();
|
46 | 48 | }
|
47 | 49 |
|
| 50 | +namespace { |
| 51 | + |
| 52 | +class PosCache : private LRUCache<PosIdx, Pos> |
| 53 | +{ |
| 54 | + const EvalState & state; |
| 55 | + |
| 56 | +public: |
| 57 | + PosCache(const EvalState & state) |
| 58 | + : LRUCache(524288) /* ~40MiB */ |
| 59 | + , state(state) |
| 60 | + { |
| 61 | + } |
| 62 | + |
| 63 | + Pos lookup(PosIdx posIdx) |
| 64 | + { |
| 65 | + auto posOrNone = LRUCache::get(posIdx); |
| 66 | + if (posOrNone) |
| 67 | + return *posOrNone; |
| 68 | + |
| 69 | + auto pos = state.positions[posIdx]; |
| 70 | + upsert(posIdx, pos); |
| 71 | + return pos; |
| 72 | + } |
| 73 | +}; |
| 74 | + |
| 75 | +struct LambdaFrameInfo |
| 76 | +{ |
| 77 | + ExprLambda * expr; |
| 78 | + /** Position where the lambda has been called. */ |
| 79 | + PosIdx callPos = noPos; |
| 80 | + std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const; |
| 81 | + auto operator<=>(const LambdaFrameInfo & rhs) const = default; |
| 82 | +}; |
| 83 | + |
| 84 | +/** Primop call. */ |
| 85 | +struct PrimOpFrameInfo |
| 86 | +{ |
| 87 | + const PrimOp * expr; |
| 88 | + /** Position where the primop has been called. */ |
| 89 | + PosIdx callPos = noPos; |
| 90 | + std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const; |
| 91 | + auto operator<=>(const PrimOpFrameInfo & rhs) const = default; |
| 92 | +}; |
| 93 | + |
| 94 | +/** Used for functor calls (attrset with __functor attr). */ |
| 95 | +struct FunctorFrameInfo |
| 96 | +{ |
| 97 | + PosIdx pos; |
| 98 | + std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const; |
| 99 | + auto operator<=>(const FunctorFrameInfo & rhs) const = default; |
| 100 | +}; |
| 101 | + |
| 102 | +/** Fallback frame info. */ |
| 103 | +struct GenericFrameInfo |
| 104 | +{ |
| 105 | + PosIdx pos; |
| 106 | + std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const; |
| 107 | + auto operator<=>(const GenericFrameInfo & rhs) const = default; |
| 108 | +}; |
| 109 | + |
| 110 | +using FrameInfo = std::variant<LambdaFrameInfo, PrimOpFrameInfo, FunctorFrameInfo, GenericFrameInfo>; |
| 111 | +using FrameStack = std::vector<FrameInfo>; |
| 112 | + |
| 113 | +/** |
| 114 | + * Stack sampling profiler which dumps the collected profile on destruction. |
| 115 | + * |
| 116 | + * @todo This should probably save the profile somewhere other than the |
| 117 | + * dtor, but its hard to consistently do that for all cases when EvalState |
| 118 | + * goes out of scope. |
| 119 | + */ |
| 120 | +class SampleStack : public EvalProfiler |
| 121 | +{ |
| 122 | + /* How often stack profiles should be flushed to file. This avoids the need |
| 123 | + to persist stack samples across the whole evaluation at the cost |
| 124 | + of periodically flushing data to disk. */ |
| 125 | + static constexpr std::chrono::microseconds profileDumpInterval = std::chrono::milliseconds(2000); |
| 126 | + |
| 127 | + Hooks getNeededHooksImpl() const override |
| 128 | + { |
| 129 | + return Hooks().set(preFunctionCall).set(postFunctionCall); |
| 130 | + } |
| 131 | + |
| 132 | +public: |
| 133 | + SampleStack(const EvalState & state, std::filesystem::path profileFile, std::chrono::microseconds period) |
| 134 | + : state(state) |
| 135 | + , sampleInterval(period) |
| 136 | + , profileFd([&]() { |
| 137 | + AutoCloseFD fd = toDescriptor(open(profileFile.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0660)); |
| 138 | + if (!fd) |
| 139 | + throw SysError("opening file '%s'", profileFile); |
| 140 | + return fd; |
| 141 | + }()) |
| 142 | + , posCache(state) |
| 143 | + { |
| 144 | + } |
| 145 | + |
| 146 | + [[gnu::noinline]] void |
| 147 | + preFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos) override; |
| 148 | + [[gnu::noinline]] void |
| 149 | + postFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos) override; |
| 150 | + |
| 151 | + void maybeSaveProfile(std::chrono::time_point<std::chrono::high_resolution_clock> now); |
| 152 | + void saveProfile(); |
| 153 | + FrameInfo getFrameInfoFromValueAndPos(const Value & v, PosIdx pos); |
| 154 | + |
| 155 | + SampleStack(SampleStack &&) = default; |
| 156 | + SampleStack & operator=(SampleStack &&) = delete; |
| 157 | + SampleStack(const SampleStack &) = delete; |
| 158 | + SampleStack & operator=(const SampleStack &) = delete; |
| 159 | + ~SampleStack(); |
| 160 | + |
| 161 | +private: |
| 162 | + /** Hold on to an instance of EvalState for symbolizing positions. */ |
| 163 | + const EvalState & state; |
| 164 | + std::chrono::microseconds sampleInterval; |
| 165 | + AutoCloseFD profileFd; |
| 166 | + FrameStack stack; |
| 167 | + std::map<FrameStack, uint32_t> callCount; |
| 168 | + std::chrono::time_point<std::chrono::high_resolution_clock> lastStackSample = |
| 169 | + std::chrono::high_resolution_clock::now(); |
| 170 | + std::chrono::time_point<std::chrono::high_resolution_clock> lastDump = std::chrono::high_resolution_clock::now(); |
| 171 | + PosCache posCache; |
| 172 | +}; |
| 173 | + |
| 174 | +FrameInfo SampleStack::getFrameInfoFromValueAndPos(const Value & v, PosIdx pos) |
| 175 | +{ |
| 176 | + /* NOTE: No actual references to garbage collected values are not held in |
| 177 | + the profiler. */ |
| 178 | + FrameInfo info; |
| 179 | + if (v.isLambda()) |
| 180 | + return LambdaFrameInfo{.expr = v.payload.lambda.fun, .callPos = pos}; |
| 181 | + else if (v.isPrimOp()) |
| 182 | + return PrimOpFrameInfo{.expr = v.primOp(), .callPos = pos}; |
| 183 | + else if (v.isPrimOpApp()) |
| 184 | + /* Resolve primOp eagerly. Must not hold on to a reference to a Value. */ |
| 185 | + return PrimOpFrameInfo{.expr = v.primOpAppPrimOp(), .callPos = pos}; |
| 186 | + else if (state.isFunctor(v)) { |
| 187 | + const auto functor = v.attrs()->get(state.sFunctor); |
| 188 | + if (auto pos_ = posCache.lookup(pos); std::holds_alternative<std::monostate>(pos_.origin)) |
| 189 | + /* HACK: In case callsite position is unresolved. */ |
| 190 | + return FunctorFrameInfo{.pos = functor->pos}; |
| 191 | + return FunctorFrameInfo{.pos = pos}; |
| 192 | + } else |
| 193 | + /* TODO: What to do with when trying to call an a non-function? This |
| 194 | + doesn't matter much, but the profiler should not crash evaluation, so |
| 195 | + add a frame regardless. This is what trace-function-calls does. */ |
| 196 | + return GenericFrameInfo{.pos = pos}; |
| 197 | +} |
| 198 | + |
| 199 | +[[gnu::noinline]] void SampleStack::preFunctionCallHook( |
| 200 | + const EvalState & state, const Value & v, [[maybe_unused]] std::span<Value *> args, const PosIdx pos) |
| 201 | +{ |
| 202 | + stack.push_back(getFrameInfoFromValueAndPos(v, pos)); |
| 203 | + auto now = std::chrono::high_resolution_clock::now(); |
| 204 | + /* Do this in preFunctionCallHook because we might throw an exception, but |
| 205 | + callFunction uses Finally, which doesn't play well with exceptions. */ |
| 206 | + maybeSaveProfile(now); |
| 207 | +} |
| 208 | + |
| 209 | +[[gnu::noinline]] void |
| 210 | +SampleStack::postFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos) |
| 211 | +{ |
| 212 | + auto now = std::chrono::high_resolution_clock::now(); |
| 213 | + |
| 214 | + if (now - lastStackSample > sampleInterval) { |
| 215 | + callCount[stack] += 1; |
| 216 | + lastStackSample = now; |
| 217 | + } |
| 218 | + |
| 219 | + if (!stack.empty()) |
| 220 | + stack.pop_back(); |
| 221 | +} |
| 222 | + |
| 223 | +std::ostream & LambdaFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const |
| 224 | +{ |
| 225 | + if (auto pos = posCache.lookup(callPos); std::holds_alternative<std::monostate>(pos.origin)) |
| 226 | + /* HACK: To avoid dubious «none»:0 in the generated profile if the origin can't be resolved |
| 227 | + resort to printing the lambda location instead of the callsite position. */ |
| 228 | + os << posCache.lookup(expr->getPos()); |
| 229 | + else |
| 230 | + os << pos; |
| 231 | + if (expr->name) |
| 232 | + os << ":" << state.symbols[expr->name]; |
| 233 | + return os; |
| 234 | +} |
| 235 | + |
| 236 | +std::ostream & GenericFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const |
| 237 | +{ |
| 238 | + os << posCache.lookup(pos); |
| 239 | + return os; |
| 240 | +} |
| 241 | + |
| 242 | +std::ostream & FunctorFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const |
| 243 | +{ |
| 244 | + os << posCache.lookup(pos) << ":functor"; |
| 245 | + return os; |
| 246 | +} |
| 247 | + |
| 248 | +std::ostream & PrimOpFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const |
| 249 | +{ |
| 250 | + /* Sometimes callsite position can have an unresolved origin, which |
| 251 | + leads to confusing «none»:0 locations in the profile. */ |
| 252 | + auto pos = posCache.lookup(callPos); |
| 253 | + if (!std::holds_alternative<std::monostate>(pos.origin)) |
| 254 | + os << posCache.lookup(callPos) << ":"; |
| 255 | + os << *expr; |
| 256 | + return os; |
| 257 | +} |
| 258 | + |
| 259 | +void SampleStack::maybeSaveProfile(std::chrono::time_point<std::chrono::high_resolution_clock> now) |
| 260 | +{ |
| 261 | + if (now - lastDump > profileDumpInterval) |
| 262 | + saveProfile(); |
| 263 | + else |
| 264 | + return; |
| 265 | + |
| 266 | + /* Save the last dump timepoint. Do this after actually saving data to file |
| 267 | + to not account for the time doing the flushing to disk. */ |
| 268 | + lastDump = std::chrono::system_clock::now(); |
| 269 | + |
| 270 | + /* Free up memory used for stack sampling. This might be very significant for |
| 271 | + long-running evaluations, so we shouldn't hog too much memory. */ |
| 272 | + callCount.clear(); |
| 273 | +} |
| 274 | + |
| 275 | +void SampleStack::saveProfile() |
| 276 | +{ |
| 277 | + auto os = std::ostringstream{}; |
| 278 | + for (auto & [stack, count] : callCount) { |
| 279 | + auto first = true; |
| 280 | + for (auto & pos : stack) { |
| 281 | + if (first) |
| 282 | + first = false; |
| 283 | + else |
| 284 | + os << ";"; |
| 285 | + |
| 286 | + std::visit([&](auto && info) { info.symbolize(state, os, posCache); }, pos); |
| 287 | + } |
| 288 | + os << " " << count; |
| 289 | + writeLine(profileFd.get(), std::move(os).str()); |
| 290 | + /* Clear ostringstream. */ |
| 291 | + os.str(""); |
| 292 | + os.clear(); |
| 293 | + } |
| 294 | +} |
| 295 | + |
| 296 | +SampleStack::~SampleStack() |
| 297 | +{ |
| 298 | + /* Guard against cases when we are already unwinding the stack. */ |
| 299 | + try { |
| 300 | + saveProfile(); |
| 301 | + } catch (std::exception & e) { |
| 302 | + /* Can't rethrow because ~EvalState is noexcept. */ |
| 303 | + try { |
| 304 | + warn("failed to save eval profile file: %1%", e.what()); |
| 305 | + } catch (...) { |
| 306 | + /* In case `warn` also throws give up. */ |
| 307 | + } |
| 308 | + } |
| 309 | +} |
| 310 | + |
| 311 | +} // namespace |
| 312 | + |
| 313 | +ref<EvalProfiler> |
| 314 | +makeSampleStackProfiler(const EvalState & state, std::filesystem::path profileFile, std::chrono::microseconds period) |
| 315 | +{ |
| 316 | + return make_ref<SampleStack>(state, profileFile, period); |
| 317 | +} |
| 318 | + |
48 | 319 | }
|
0 commit comments