diff --git a/CMakeLists.txt b/CMakeLists.txt index 1c7c173328bf..ae47bdaeb327 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,6 +38,7 @@ option(PARTHENON_DISABLE_HDF5 "HDF5 is enabled by default if found, set this to option(PARTHENON_DISABLE_HDF5_COMPRESSION "HDF5 compression is enabled by default, set this to True to disable compression in HDF5 output/restart files" OFF) option(PARTHENON_DISABLE_SPARSE "Sparse capability is enabled by default, set this to True to compile-time disable all sparse capability" OFF) option(PARTHENON_ENABLE_LB_TIMERS "Timer-based load balancing is disabled by default, set this to True to enable timers" OFF) +option(PARTHENON_ENABLE_TRACE "Tracing is disabled by default, set to ON to enable tracing" OFF) option(PARTHENON_ENABLE_ASCENT "Enable Ascent for in situ visualization and analysis" OFF) option(PARTHENON_LINT_DEFAULT "Linting is turned off by default, use the \"lint\" target or set \ this to True to enable linting in the default target" OFF) @@ -138,6 +139,11 @@ if (PARTHENON_ENABLE_LB_TIMERS) set(ENABLE_LB_TIMERS ON) endif() +set(ENABLE_TRACE OFF) +if (PARTHENON_ENABLE_TRACE) + set(ENABLE_TRACE ON) +endif() + set(ENABLE_HDF5 OFF) if (NOT PARTHENON_DISABLE_HDF5) set(HDF5_PREFER_PARALLEL ${ENABLE_MPI}) diff --git a/scripts/python/packages/parthenon_plot_trace/plot_trace.py b/scripts/python/packages/parthenon_plot_trace/plot_trace.py new file mode 100644 index 000000000000..054dde40ae5f --- /dev/null +++ b/scripts/python/packages/parthenon_plot_trace/plot_trace.py @@ -0,0 +1,168 @@ +import sys +import re +import matplotlib.pyplot as plt +import itertools +from collections import OrderedDict +from argparse import ArgumentParser + +parser = ArgumentParser(prog="plot_trace", description="Plot parthenon tracer output") + +parser.add_argument( + "--start", + dest="step_start", + type=int, + default=-1, + help="First step to include", +) + +parser.add_argument( + "--stop", + dest="step_stop", + type=int, + default=-1, + help="Final step to include (inclusive)", +) + +parser.add_argument("files", type=str, nargs="+", help="trace files to plot") + + +class Region: + def __init__(self): + self.start = [] + self.duration = [] + + def add_samples(self, line): + words = line.split() + start = float(words[0]) + stop = float(words[1]) + self.start.append(start) + self.duration.append(stop - start) + + def trim(self, tstart, tstop): + istart = 0 + istop = 0 + set_start = False + set_stop = False + for i in range(len(self.start)): + if not set_start and self.start[i] > tstart: + istart = i + set_start = True + if not set_stop and self.start[i] + self.duration[i] > tstop: + istop = i + set_stop = True + if set_start and set_stop: + break + if not set_stop: + istop = len(self.start) + if not set_start: + istart = istop + self.start = self.start[istart:istop] + self.duration = self.duration[istart:istop] + + +class Trace: + def __init__(self, name, step_start, step_stop): + self.step_start = step_start + self.step_stop = step_stop + self.rank = int(re.search("trace_(.*).txt", name).group(1)) + with open(name) as f: + data = f.readlines() + self.regions = {} + current_region = "" + for line in data: + l = line.rstrip() + words = l.split() + if words[0] == "Region:": + if words[1] == "StepTimer": + reg_name = words[1] + self.regions[reg_name] = Region() + current_region = reg_name + continue + else: + fstr = l[l.find("::") + 2 :] + reg_name = fstr[fstr.find(":") + 2 :] + self.regions[reg_name] = Region() + current_region = reg_name + continue + self.regions[current_region].add_samples(line) + step_start_time = 0.0 + step_stop_time = 999999.0 + if step_start > 0: + if step_start < len(self.regions["StepTimer"].start): + step_start_time = self.regions["StepTimer"].start[step_start] + if step_stop > -1 and step_stop < len(self.regions["StepTimer"].start): + step_stop_time = ( + self.regions["StepTimer"].start[step_stop] + + self.regions["StepTimer"].duration[step_stop] + ) + for key, val in self.regions.items(): + if key == "StepTimer": + continue + val.trim(step_start_time, step_stop_time) + + def region_names(self): + return list(self.regions.keys()) + + def plot_trace(self, ax, colorMap, hatchMap): + for key, val in self.regions.items(): + if key == "StepTimer": + continue + ax.barh( + self.rank, + val.duration, + left=val.start, + height=0.5, + label=key, + color=colorMap[key], + hatch=hatchMap[key], + ) + + +def main(files, step_start, step_stop): + trace = [] + for f in files: + print("Getting trace", f, end="") + trace.append(Trace(f, step_start, step_stop)) + print(" done!") + # get max number of functions + all_funcs = OrderedDict() + for t in trace: + for key in t.region_names(): + all_funcs[key] = "" + + num_colors = len(all_funcs) + cm = plt.get_cmap("tab20") + hatch = ["", "--", "/", "\\", "+", "x"] + num_hatches = len(hatch) + colorMap = {} + hatchMap = {} + cindex = 0 + for f, dum in all_funcs.items(): + colorMap[f] = cm((cindex + 0.5) / num_colors) + hatchMap[f] = hatch[cindex % num_hatches] + cindex += 1 + fig, ax = plt.subplots(figsize=(18, 12)) + for t in trace: + print("Plotting trace", t.rank, end="") + t.plot_trace(ax, colorMap, hatchMap) + print(" done!") + plt.xlabel("Time (s)") + plt.ylabel("Rank") + # box = ax.get_position() + # ax.set_position([box.x0, box.y0 + box.height*0.2, box.width, box.height * 0.8]) + handles, labels = plt.gca().get_legend_handles_labels() + by_label = OrderedDict(zip(labels, handles)) + ax.legend( + by_label.values(), + by_label.keys(), + loc="upper center", + bbox_to_anchor=(0, -0.02, 1, -0.02), + ncol=3, + ) + plt.tight_layout() + plt.show() + + +if __name__ == "__main__": + args = parser.parse_args() + main(args.files, args.step_start, args.step_stop) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 38712fb53948..33b08537b11c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -232,6 +232,7 @@ add_library(parthenon utils/error_checking.cpp utils/hash.hpp utils/indexer.hpp + utils/instrument.cpp utils/instrument.hpp utils/loop_utils.hpp utils/morton_number.hpp diff --git a/src/amr_criteria/refinement_package.cpp b/src/amr_criteria/refinement_package.cpp index 57542e9e5775..b3422eb62bdb 100644 --- a/src/amr_criteria/refinement_package.cpp +++ b/src/amr_criteria/refinement_package.cpp @@ -156,6 +156,7 @@ void SetRefinement_(MeshBlockData *rc) { template <> TaskStatus Tag(MeshBlockData *rc) { + PARTHENON_TRACE PARTHENON_INSTRUMENT SetRefinement_(rc); return TaskStatus::complete; @@ -163,6 +164,7 @@ TaskStatus Tag(MeshBlockData *rc) { template <> TaskStatus Tag(MeshData *rc) { + PARTHENON_TRACE PARTHENON_INSTRUMENT for (int i = 0; i < rc->NumBlocks(); i++) { SetRefinement_(rc->GetBlockData(i).get()); diff --git a/src/bvals/boundary_conditions.cpp b/src/bvals/boundary_conditions.cpp index f453f256bedd..cdf980070fb7 100644 --- a/src/bvals/boundary_conditions.cpp +++ b/src/bvals/boundary_conditions.cpp @@ -58,6 +58,7 @@ TaskStatus ApplyBoundaryConditionsMD(std::shared_ptr> &pmd) { TaskStatus ApplyBoundaryConditionsOnCoarseOrFineMD(std::shared_ptr> &pmd, bool coarse) { + PARTHENON_TRACE for (int b = 0; b < pmd->NumBlocks(); ++b) ApplyBoundaryConditionsOnCoarseOrFine(pmd->GetBlockData(b), coarse); return TaskStatus::complete; diff --git a/src/bvals/comms/boundary_communication.cpp b/src/bvals/comms/boundary_communication.cpp index 15577b353055..340b12ebf56b 100644 --- a/src/bvals/comms/boundary_communication.cpp +++ b/src/bvals/comms/boundary_communication.cpp @@ -47,6 +47,7 @@ using namespace loops::shorthands; template TaskStatus SendBoundBufs(std::shared_ptr> &md) { + PARTHENON_TRACE PARTHENON_INSTRUMENT Mesh *pmesh = md->GetMeshPointer(); @@ -169,6 +170,7 @@ SendBoundBufs(std::shared_ptr> &); template TaskStatus StartReceiveBoundBufs(std::shared_ptr> &md) { + PARTHENON_TRACE PARTHENON_INSTRUMENT Mesh *pmesh = md->GetMeshPointer(); auto &cache = md->GetBvarsCache().GetSubCache(bound_type, false); @@ -191,6 +193,7 @@ StartReceiveBoundBufs(std::shared_ptr> &) template TaskStatus ReceiveBoundBufs(std::shared_ptr> &md) { + PARTHENON_TRACE PARTHENON_INSTRUMENT Mesh *pmesh = md->GetMeshPointer(); @@ -235,6 +238,7 @@ ReceiveBoundBufs(std::shared_ptr> &); template TaskStatus SetBounds(std::shared_ptr> &md) { + PARTHENON_TRACE PARTHENON_INSTRUMENT Mesh *pmesh = md->GetMeshPointer(); @@ -319,6 +323,7 @@ template TaskStatus SetBounds(std::shared_ptr TaskStatus ProlongateBounds(std::shared_ptr> &md) { + PARTHENON_TRACE PARTHENON_INSTRUMENT Mesh *pmesh = md->GetMeshPointer(); diff --git a/src/bvals/comms/flux_correction.cpp b/src/bvals/comms/flux_correction.cpp index ff76bcba0014..ed3603a0779f 100644 --- a/src/bvals/comms/flux_correction.cpp +++ b/src/bvals/comms/flux_correction.cpp @@ -39,6 +39,7 @@ namespace parthenon { using namespace impl; TaskStatus LoadAndSendFluxCorrections(std::shared_ptr> &md) { + PARTHENON_TRACE PARTHENON_INSTRUMENT Mesh *pmesh = md->GetMeshPointer(); @@ -118,6 +119,7 @@ TaskStatus LoadAndSendFluxCorrections(std::shared_ptr> &md) { } TaskStatus StartReceiveFluxCorrections(std::shared_ptr> &md) { + PARTHENON_TRACE PARTHENON_INSTRUMENT Mesh *pmesh = md->GetMeshPointer(); auto &cache = md->GetBvarsCache().GetSubCache(BoundaryType::flxcor_recv, false); @@ -132,6 +134,7 @@ TaskStatus StartReceiveFluxCorrections(std::shared_ptr> &md) { } TaskStatus ReceiveFluxCorrections(std::shared_ptr> &md) { + PARTHENON_TRACE PARTHENON_INSTRUMENT Mesh *pmesh = md->GetMeshPointer(); @@ -150,6 +153,7 @@ TaskStatus ReceiveFluxCorrections(std::shared_ptr> &md) { } TaskStatus SetFluxCorrections(std::shared_ptr> &md) { + PARTHENON_TRACE PARTHENON_INSTRUMENT Mesh *pmesh = md->GetMeshPointer(); diff --git a/src/config.hpp.in b/src/config.hpp.in index 454f31976104..1f9595cf31c2 100644 --- a/src/config.hpp.in +++ b/src/config.hpp.in @@ -54,6 +54,9 @@ // define ENABLE_LB_TIMERS or not at all #cmakedefine ENABLE_LB_TIMERS +// define ENABLE_TRACE or not at all +#cmakedefine ENABLE_TRACE + // define PARTHENON_ENABLE_ASCENT or not at all #cmakedefine PARTHENON_ENABLE_ASCENT diff --git a/src/driver/driver.cpp b/src/driver/driver.cpp index b12a8dde3f9b..968642221ed9 100644 --- a/src/driver/driver.cpp +++ b/src/driver/driver.cpp @@ -78,6 +78,7 @@ DriverStatus EvolutionDriver::Execute() { { // Main t < tmax loop region PARTHENON_INSTRUMENT while (tm.KeepGoing()) { + PARTHENON_TRACE_REGION("StepTimer") if (Globals::my_rank == 0) OutputCycleDiagnostics(); pmesh->PreStepUserWorkInLoop(pmesh, pinput, tm); @@ -160,6 +161,7 @@ void EvolutionDriver::PostExecute(DriverStatus status) { } void EvolutionDriver::InitializeBlockTimeStepsAndBoundaries() { + PARTHENON_TRACE // calculate the first time step using Block function for (auto &pmb : pmesh->block_list) { Update::EstimateTimestep(pmb->meshblock_data.Get().get()); @@ -180,6 +182,7 @@ void EvolutionDriver::InitializeBlockTimeStepsAndBoundaries() { // \brief function that loops over all MeshBlocks and find new timestep void EvolutionDriver::SetGlobalTimeStep() { + PARTHENON_TRACE // don't allow dt to grow by more than 2x // consider making this configurable in the input if (tm.dt < 0.1 * std::numeric_limits::max()) { diff --git a/src/interface/update.cpp b/src/interface/update.cpp index 96855ee2426b..e07df74aa072 100644 --- a/src/interface/update.cpp +++ b/src/interface/update.cpp @@ -36,6 +36,8 @@ namespace Update { template <> TaskStatus FluxDivergence(MeshBlockData *in, MeshBlockData *dudt_cont) { + PARTHENON_TRACE + PARTHENON_INSTRUMENT std::shared_ptr pmb = in->GetBlockPointer(); const IndexDomain interior = IndexDomain::interior; @@ -61,6 +63,8 @@ TaskStatus FluxDivergence(MeshBlockData *in, MeshBlockData *dudt_con template <> TaskStatus FluxDivergence(MeshData *in_obj, MeshData *dudt_obj) { + PARTHENON_TRACE + PARTHENON_INSTRUMENT const IndexDomain interior = IndexDomain::interior; std::vector flags({Metadata::WithFluxes, Metadata::Cell}); @@ -88,6 +92,8 @@ template <> TaskStatus UpdateWithFluxDivergence(MeshBlockData *u0_data, MeshBlockData *u1_data, const Real gam0, const Real gam1, const Real beta_dt) { + PARTHENON_TRACE + PARTHENON_INSTRUMENT std::shared_ptr pmb = u0_data->GetBlockPointer(); const IndexDomain interior = IndexDomain::interior; @@ -116,6 +122,8 @@ template <> TaskStatus UpdateWithFluxDivergence(MeshData *u0_data, MeshData *u1_data, const Real gam0, const Real gam1, const Real beta_dt) { + PARTHENON_TRACE + PARTHENON_INSTRUMENT const IndexDomain interior = IndexDomain::interior; std::vector flags({Metadata::WithFluxes, Metadata::Cell}); @@ -141,6 +149,7 @@ TaskStatus UpdateWithFluxDivergence(MeshData *u0_data, MeshData *u1_ } TaskStatus SparseDealloc(MeshData *md) { + PARTHENON_TRACE PARTHENON_INSTRUMENT if (!Globals::sparse_config.enabled || (md->NumBlocks() == 0)) { return TaskStatus::complete; diff --git a/src/interface/update.hpp b/src/interface/update.hpp index 21f035caefa3..865db07262ed 100644 --- a/src/interface/update.hpp +++ b/src/interface/update.hpp @@ -70,6 +70,7 @@ TaskStatus UpdateWithFluxDivergence(T *data_u0, T *data_u1, const Real gam0, template TaskStatus WeightedSumData(const F &flags, T *in1, T *in2, const Real w1, const Real w2, T *out) { + PARTHENON_TRACE PARTHENON_INSTRUMENT const auto &x = in1->PackVariables(flags); const auto &y = in2->PackVariables(flags); @@ -95,6 +96,7 @@ TaskStatus CopyData(const F &flags, T *in, T *out) { template TaskStatus SetDataToConstant(const F &flags, T *data, const Real val) { + PARTHENON_TRACE PARTHENON_INSTRUMENT const auto &x = data->PackVariables(flags); parthenon::par_for( @@ -146,6 +148,7 @@ template TaskStatus Update2S(const F &flags, T *s0_data, T *s1_data, T *rhs_data, const LowStorageIntegrator *pint, Real dt, int stage, bool update_s1) { + PARTHENON_TRACE PARTHENON_INSTRUMENT const auto &s0 = s0_data->PackVariables(flags); const auto &s1 = s1_data->PackVariables(flags); @@ -191,6 +194,7 @@ TaskStatus SumButcher(const F &flags, std::shared_ptr base_data, std::vector> stage_data, std::shared_ptr out_data, const ButcherIntegrator *pint, Real dt, int stage) { + PARTHENON_TRACE PARTHENON_INSTRUMENT const auto &out = out_data->PackVariables(flags); const auto &in = base_data->PackVariables(flags); @@ -234,6 +238,7 @@ template TaskStatus UpdateButcher(const F &flags, std::vector> stage_data, std::shared_ptr out_data, const ButcherIntegrator *pint, Real dt) { + PARTHENON_TRACE PARTHENON_INSTRUMENT const auto &out = out_data->PackVariables(flags); @@ -267,6 +272,7 @@ TaskStatus UpdateButcherIndependent(std::vector> stage_data, template TaskStatus EstimateTimestep(T *rc) { + PARTHENON_TRACE PARTHENON_INSTRUMENT Real dt_min = std::numeric_limits::max(); for (const auto &pkg : rc->GetParentPointer()->packages.AllPackages()) { @@ -279,6 +285,7 @@ TaskStatus EstimateTimestep(T *rc) { template TaskStatus PreCommFillDerived(T *rc) { + PARTHENON_TRACE PARTHENON_INSTRUMENT auto pm = rc->GetParentPointer(); for (const auto &pkg : pm->packages.AllPackages()) { @@ -289,6 +296,7 @@ TaskStatus PreCommFillDerived(T *rc) { template TaskStatus FillDerived(T *rc) { + PARTHENON_TRACE PARTHENON_INSTRUMENT auto pm = rc->GetParentPointer(); { // PreFillDerived region @@ -314,6 +322,7 @@ TaskStatus FillDerived(T *rc) { template TaskStatus InitNewlyAllocatedVars(T *rc) { + PARTHENON_TRACE PARTHENON_INSTRUMENT if (!rc->AllVariablesInitialized()) { const IndexDomain interior = IndexDomain::interior; diff --git a/src/mesh/amr_loadbalance.cpp b/src/mesh/amr_loadbalance.cpp index 49219de78bab..68ebddab4601 100644 --- a/src/mesh/amr_loadbalance.cpp +++ b/src/mesh/amr_loadbalance.cpp @@ -317,6 +317,7 @@ bool TryRecvSameToSame(int lid_recv, int send_rank, Variable *var, MeshBlo void Mesh::LoadBalancingAndAdaptiveMeshRefinement(ParameterInput *pin, ApplicationInput *app_in) { + PARTHENON_TRACE PARTHENON_INSTRUMENT int nnew = 0, ndel = 0; diff --git a/src/outputs/outputs.cpp b/src/outputs/outputs.cpp index 14eac0dd20fa..d8cf2caedfee 100644 --- a/src/outputs/outputs.cpp +++ b/src/outputs/outputs.cpp @@ -402,6 +402,7 @@ void OutputType::ClearOutputData() { void Outputs::MakeOutputs(Mesh *pm, ParameterInput *pin, SimTime *tm, const SignalHandler::OutputSignal signal) { + PARTHENON_TRACE PARTHENON_INSTRUMENT bool first = true; OutputType *ptype = pfirst_type_; diff --git a/src/parthenon_manager.cpp b/src/parthenon_manager.cpp index 2329553bfe76..46fc3e6f1a4f 100644 --- a/src/parthenon_manager.cpp +++ b/src/parthenon_manager.cpp @@ -36,6 +36,7 @@ #include "outputs/output_utils.hpp" #include "outputs/parthenon_hdf5.hpp" #include "utils/error_checking.hpp" +#include "utils/instrument.hpp" #include "utils/utils.hpp" namespace parthenon { @@ -75,6 +76,13 @@ ParthenonStatus ParthenonManager::ParthenonInitEnv(int argc, char *argv[]) { Kokkos::initialize(argc, argv); +#ifdef ENABLE_TRACE +#ifdef MPI_PARALLEL + MPI_Barrier(MPI_COMM_WORLD); +#endif + Trace::Initialize(); +#endif + // pgrete: This is a hack to disable allocation tracking until the Kokkos // tools provide a more fine grained control out of the box. bool unused; @@ -199,6 +207,9 @@ void ParthenonManager::ParthenonInitPackagesAndMesh() { ParthenonStatus ParthenonManager::ParthenonFinalize() { pmesh.reset(); Kokkos::finalize(); +#ifdef ENABLE_TRACE + Trace::Report(); +#endif #ifdef MPI_PARALLEL MPI_Finalize(); #endif diff --git a/src/utils/instrument.cpp b/src/utils/instrument.cpp new file mode 100644 index 000000000000..8f62ea4275b9 --- /dev/null +++ b/src/utils/instrument.cpp @@ -0,0 +1,21 @@ +//======================================================================================== +// (C) (or copyright) 2023. Triad National Security, LLC. All rights reserved. +// +// This program was produced under U.S. Government contract 89233218CNA000001 for Los +// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC +// for the U.S. Department of Energy/National Nuclear Security Administration. All rights +// in the program are reserved by Triad National Security, LLC, and the U.S. Department +// of Energy/National Nuclear Security Administration. The Government is granted for +// itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide +// license in this material to reproduce, prepare derivative works, distribute copies to +// the public, perform publicly and display publicly, and to permit others to do so. +//======================================================================================== + +#include "instrument.hpp" + +namespace parthenon { +#ifdef ENABLE_TRACE +std::map> Trace::time_; +Trace::tp_t Trace::t0_; +#endif +} // namespace parthenon diff --git a/src/utils/instrument.hpp b/src/utils/instrument.hpp index 9f5e9aaecc77..9484d1ebd92f 100644 --- a/src/utils/instrument.hpp +++ b/src/utils/instrument.hpp @@ -13,16 +13,28 @@ #ifndef UTILS_INSTRUMENT_HPP_ #define UTILS_INSTRUMENT_HPP_ +#include +#include +#include +#include #include +#include #include +#include "../globals.hpp" +#include "config.hpp" + #define __UNIQUE_INST_VAR2(x, y) x##y #define __UNIQUE_INST_VAR(x, y) __UNIQUE_INST_VAR2(x, y) #define PARTHENON_INSTRUMENT \ KokkosTimer __UNIQUE_INST_VAR(internal_inst, __LINE__)(__FILE__, __LINE__, __func__); #define PARTHENON_INSTRUMENT_REGION(name) \ KokkosTimer __UNIQUE_INST_VAR(internal_inst_reg, __LINE__)(name); +#define PARTHENON_TRACE \ + Trace __UNIQUE_INST_VAR(internal_trace, __LINE__)(__FILE__, __LINE__, __func__); +#define PARTHENON_TRACE_REGION(name) \ + Trace __UNIQUE_INST_VAR(internal_trace_reg, __LINE__)(name); #define PARTHENON_AUTO_LABEL parthenon::build_auto_label(__FILE__, __LINE__, __func__) namespace parthenon { @@ -55,6 +67,47 @@ struct KokkosTimer { void Push(const std::string &name) { Kokkos::Profiling::pushRegion(name); } }; +class Trace { + public: +#ifdef ENABLE_TRACE + using clock = std::chrono::high_resolution_clock; + using seconds = std::chrono::duration; + using tp_t = std::chrono::time_point; + Trace(const std::string &file, const int line, const std::string &name) + : label_(build_auto_label(file, line, name)) { + time_[label_].push_back(clock::now()); + } + explicit Trace(const std::string &name) : label_(name) { + time_[label_].push_back(clock::now()); + } + ~Trace() { time_[label_].push_back(clock::now()); } + static void Initialize() { t0_ = clock::now(); } + static void Report() { + std::stringstream fname; + fname << "trace_" << Globals::my_rank << ".txt"; + std::ofstream f(fname.str().c_str()); + for (const auto &pair : time_) { + f << "Region: " << pair.first << std::endl; + const auto × = pair.second; + for (int i = 0; i < times.size(); i += 2) { + f << seconds(times[i] - t0_).count() << " " << seconds(times[i + 1] - t0_).count() + << std::endl; + } + } + f.close(); + } + + private: + const std::string label_; + static std::map> time_; + static tp_t t0_; +#else + // stub out if not configured to do tracing + template + Trace(Args &&...args) {} +#endif +}; + } // namespace parthenon #endif // UTILS_INSTRUMENT_HPP_