From dea243266d54b570ef4b48936fd0fdf5b66e55c2 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 21 Aug 2024 11:00:47 -0500 Subject: [PATCH] TinyProfiler:: Add more runtime parameters (#4100) Add the following ParmParse parameters to TinyProfiler: * tiny_profiler.enabled This parameter can be used to disable tiny profiling including CArena memory profiling at run time. * tiny_profiler.memprof_enabled This parameter can be used to disable tiny profiler's memory arena profiling at run time. If tiny_profiler.enabled is false, this parameter has no effects. * tiny_profiler.output_file If this parameter is empty, the output of tiny profiling is dumped on the default out stream of AMReX. If it's not empty, it specifies the file name for the output. Note that `/dev/null` is a special name that mean a null file. --- .../source/RuntimeParameters.rst | 32 +++ Src/Base/AMReX_TinyProfiler.H | 8 +- Src/Base/AMReX_TinyProfiler.cpp | 250 ++++++++++++------ 3 files changed, 213 insertions(+), 77 deletions(-) diff --git a/Docs/sphinx_documentation/source/RuntimeParameters.rst b/Docs/sphinx_documentation/source/RuntimeParameters.rst index d6ac1014de..c57c74c8fb 100644 --- a/Docs/sphinx_documentation/source/RuntimeParameters.rst +++ b/Docs/sphinx_documentation/source/RuntimeParameters.rst @@ -1204,3 +1204,35 @@ enabled. which are unnecessary for correctness, could potentially degrade the performance. +.. py:data:: tiny_profiler.enabled + :type: bool + :value: true + + .. versionadded:: 24.09 + Runtime parameter `tiny_profiler.enabled``. + + This parameter can be used to disable tiny profiling including + :cpp:`CArena` memory profiling at run time. + +.. py:data:: tiny_profiler.memprof_enabled + :type: bool + :value: true + + .. versionadded:: 24.09 + Runtime parameter ``tiny_profiler.memprof_enabled``. + + This parameter can be used to disable :cpp:`CArena` memory profiling at + run time. If ``tiny_profiler.enabled`` is false, this parameter has no + effects. + +.. py:data:: tiny_profiler.output_file + :type: string + :value: [empty] + + .. versionadded:: 24.09 + Runtime parameter ``tiny_profiler.output_file``. + + If this parameter is empty, the output of tiny profiling is dumped on the + default out stream of AMReX. If it's not empty, it specifies the file + name for the output. Note that ``/dev/null`` is a special name that mean + a null file. diff --git a/Src/Base/AMReX_TinyProfiler.H b/Src/Base/AMReX_TinyProfiler.H index f995eb4a2c..0228949beb 100644 --- a/Src/Base/AMReX_TinyProfiler.H +++ b/Src/Base/AMReX_TinyProfiler.H @@ -139,11 +139,15 @@ private: static int n_print_tabs; static int verbose; static double print_threshold; + static bool enabled; + static bool memprof_enabled; + static std::string output_file; - static void PrintStats (std::map& regstats, double dt_max); + static void PrintStats (std::map& regstats, double dt_max, + std::ostream* os); static void PrintMemStats (std::map& memstats, std::string const& memname, double dt_max, - double t_final); + double t_final, std::ostream* os); }; class TinyProfileRegion diff --git a/Src/Base/AMReX_TinyProfiler.cpp b/Src/Base/AMReX_TinyProfiler.cpp index 12d394abd0..db92274578 100644 --- a/Src/Base/AMReX_TinyProfiler.cpp +++ b/Src/Base/AMReX_TinyProfiler.cpp @@ -50,6 +50,9 @@ bool TinyProfiler::device_synchronize_around_region = false; int TinyProfiler::n_print_tabs = 0; int TinyProfiler::verbose = 0; double TinyProfiler::print_threshold = 1.; +bool TinyProfiler::enabled = true; +bool TinyProfiler::memprof_enabled = true; +std::string TinyProfiler::output_file; namespace { constexpr char mainregion[] = "main"; @@ -87,6 +90,8 @@ TinyProfiler::~TinyProfiler () void TinyProfiler::start () noexcept { + if (!enabled) { return; } + memory_start(); #ifdef AMREX_USE_OMP @@ -136,6 +141,8 @@ TinyProfiler::start () noexcept for (int itab = 0; itab < n_print_tabs; ++itab) { whitespace += " "; } + // If we try to print to output_file here, it may not be thread + // safe. Also note that this is controlled by verbose already. amrex::Print() << whitespace << "TP: Entering " << fname << '\n'; } } @@ -144,6 +151,8 @@ TinyProfiler::start () noexcept void TinyProfiler::stop () noexcept { + if (!enabled) { return; } + memory_stop(); #ifdef AMREX_USE_OMP @@ -205,13 +214,18 @@ TinyProfiler::stop () noexcept whitespace += " "; } --n_print_tabs; + // If we try to print to output_file here, it may not be thread + // safe. Also note that this is controlled by verbose already. amrex::Print() << whitespace << "TP: Leaving " << fname << '\n'; } } } void -TinyProfiler::memory_start () const noexcept { +TinyProfiler::memory_start () const noexcept +{ + if (!memprof_enabled) { return; } + // multiple omp threads may share the same TinyProfiler object so this function must be const // it is NOT allowed to double start a section #ifdef AMREX_USE_OMP @@ -225,7 +239,10 @@ TinyProfiler::memory_start () const noexcept { } void -TinyProfiler::memory_stop () const noexcept { +TinyProfiler::memory_stop () const noexcept +{ + if (!memprof_enabled) { return; } + // multiple omp threads may share the same TinyProfiler object so this function must be const // it IS allowed to double stop a section #ifdef AMREX_USE_OMP @@ -244,7 +261,10 @@ TinyProfiler::memory_stop () const noexcept { } MemStat* -TinyProfiler::memory_alloc (std::size_t nbytes, std::map& memstats) noexcept { +TinyProfiler::memory_alloc (std::size_t nbytes, std::map& memstats) noexcept +{ + if (!memprof_enabled) { return nullptr; } + // this function is not thread safe for the same memstats // the caller of this function (CArena::alloc) has a mutex MemStat* stat = nullptr; @@ -270,7 +290,10 @@ TinyProfiler::memory_alloc (std::size_t nbytes, std::map& } void -TinyProfiler::memory_free (std::size_t nbytes, MemStat* stat) noexcept { +TinyProfiler::memory_free (std::size_t nbytes, MemStat* stat) noexcept +{ + if (!memprof_enabled) { return; } + // this function is not thread safe for the same stat // the caller of this function (CArena::free) has a mutex if (stat) { @@ -284,8 +307,6 @@ TinyProfiler::memory_free (std::size_t nbytes, MemStat* stat) noexcept { void TinyProfiler::Initialize () noexcept { - regionstack.emplace_back(mainregion); - t_init = amrex::second(); { amrex::ParmParse pp("tiny_profiler"); pp.queryAdd("device_synchronize_around_region", device_synchronize_around_region); @@ -295,12 +316,39 @@ TinyProfiler::Initialize () noexcept // Specify the maximum percentage of inclusive time // that the "Other" section in the output can have (defaultĀ 1%) pp.queryAdd("print_threshold", print_threshold); + + pp.queryAdd("enabled", enabled); + pp.queryAdd("output_file", output_file); + } + + if (!enabled) { return; } + + if (ParallelDescriptor::IOProcessor()) { + static bool first = true; + if (first && !output_file.empty() && output_file != "/dev/null") { + if (FileSystem::Exists(output_file)) { + FileSystem::Remove(output_file); + } + first = false; + } } + + regionstack.emplace_back(mainregion); + t_init = amrex::second(); } void TinyProfiler::MemoryInitialize () noexcept { + { + amrex::ParmParse pp("tiny_profiler"); + pp.queryAdd("enabled", enabled); + pp.queryAdd("memprof_enabled", memprof_enabled); + memprof_enabled = memprof_enabled && enabled; + } + + if (!memprof_enabled) { return; } + #ifdef AMREX_USE_OMP mem_stack_thread_private.resize(omp_get_max_threads()); #endif @@ -309,6 +357,8 @@ TinyProfiler::MemoryInitialize () noexcept void TinyProfiler::Finalize (bool bFlushing) noexcept { + if (!enabled) { return; } + static bool finalized = false; if (!bFlushing) { // If flushing, don't make this the last time! if (finalized) { @@ -334,11 +384,25 @@ TinyProfiler::Finalize (bool bFlushing) noexcept ParallelReduce::Sum(dt_avg, ioproc, ParallelDescriptor::Communicator()); dt_avg /= double(nprocs); - if (ParallelDescriptor::IOProcessor()) + std::ofstream ofs; + std::ostream* os = nullptr; + std::streamsize oldprec = 0; + if (ParallelDescriptor::IOProcessor()) { + if (output_file.empty()) { + os = &(amrex::OutStream()); + } else if (output_file != "/dev/null") { + ofs.open(output_file, std::ios_base::app); + if (!ofs.is_open()) { + amrex::Error("TinyProfiler failed to open "+output_file); + } + os = static_cast(&ofs); + } + } + + if (os) { - amrex::Print() << "\n\n"; - amrex::Print().SetPrecision(4) - <<"TinyProfiler total time across processes [min...avg...max]: " + os->precision(4); + *os << "\n\nTinyProfiler total time across processes [min...avg...max]: " << dt_min << " ... " << dt_avg << " ... " << dt_max << "\n"; } @@ -362,19 +426,27 @@ TinyProfiler::Finalize (bool bFlushing) noexcept } } - PrintStats(lstatsmap[mainregion], dt_max); + PrintStats(lstatsmap[mainregion], dt_max, os); for (auto& kv : lstatsmap) { if (kv.first != mainregion) { - amrex::Print() << "\n\nBEGIN REGION " << kv.first << "\n"; - PrintStats(kv.second, dt_max); - amrex::Print() << "END REGION " << kv.first << "\n"; + if (os) { + *os << "\n\nBEGIN REGION " << kv.first << "\n"; + } + PrintStats(kv.second, dt_max, os); + if (os) { + *os << "END REGION " << kv.first << "\n"; + } } } + + if(os) { os->precision(oldprec); } } void TinyProfiler::MemoryFinalize (bool bFlushing) noexcept { + if (!memprof_enabled) { return; } + // This function must be called BEFORE the profiled arenas are deleted static bool finalized = false; @@ -391,20 +463,39 @@ TinyProfiler::MemoryFinalize (bool bFlushing) noexcept int ioproc = ParallelDescriptor::IOProcessorNumber(); ParallelReduce::Max(dt_max, ioproc, ParallelDescriptor::Communicator()); + std::ofstream ofs; + std::ostream* os = nullptr; + std::streamsize oldprec = 0; + if (ParallelDescriptor::IOProcessor()) { + if (output_file.empty()) { + os = &(amrex::OutStream()); + } else if (output_file != "/dev/null") { + ofs.open(output_file, std::ios_base::app); + if (!ofs.is_open()) { + amrex::Error("TinyProfiler failed to open "+output_file); + } + os = static_cast(&ofs); + } + } + for (std::size_t i = 0; i < all_memstats.size(); ++i) { - PrintMemStats(*(all_memstats[i]), all_memnames[i], dt_max, t_final); + PrintMemStats(*(all_memstats[i]), all_memnames[i], dt_max, t_final, os); } if (!bFlushing) { all_memstats.clear(); all_memnames.clear(); } + + if(os) { os->precision(oldprec); } } void TinyProfiler::RegisterArena (const std::string& memory_name, std::map& memstats) noexcept { + if (!memprof_enabled) { return; } + all_memstats.push_back(&memstats); all_memnames.push_back(memory_name); } @@ -412,6 +503,8 @@ TinyProfiler::RegisterArena (const std::string& memory_name, void TinyProfiler::DeregisterArena (std::map& memstats) noexcept { + if (!memprof_enabled) { return; } + for (std::size_t i = 0; i < all_memstats.size();) { if (all_memstats[i] == &memstats) { all_memstats.erase(all_memstats.begin() + i); // NOLINT @@ -423,7 +516,8 @@ TinyProfiler::DeregisterArena (std::map& memstats) noexcep } void -TinyProfiler::PrintStats (std::map& regstats, double dt_max) +TinyProfiler::PrintStats (std::map& regstats, double dt_max, + std::ostream* os) { // make sure the set of profiled functions is the same on all processes { @@ -497,9 +591,9 @@ TinyProfiler::PrintStats (std::map& regstats, double dt_max) } } - if (ParallelDescriptor::IOProcessor()) + if (ParallelDescriptor::IOProcessor() && os) { - amrex::OutStream() << std::setfill(' ') << std::setprecision(4); + *os << std::setfill(' ') << std::setprecision(4); int wt = 9; int wnc = (int) std::log10 ((double) maxncalls) + 1; @@ -564,34 +658,34 @@ TinyProfiler::PrintStats (std::map& regstats, double dt_max) // make sure "Other" is printed at the end of the list allprocstats.push_back(other_procstat); } - amrex::OutStream() << "\n" << hline << "\n"; - amrex::OutStream() << std::left - << std::setw(maxfnamelen) << "Name" - << std::right - << std::setw(wnc+2) << "NCalls" - << std::setw(wt+2) << "Excl. Min" - << std::setw(wt+2) << "Excl. Avg" - << std::setw(wt+2) << "Excl. Max" - << std::setw(wp+2) << "Max %" - << "\n" << hline << "\n"; + *os << "\n" << hline << "\n"; + *os << std::left + << std::setw(maxfnamelen) << "Name" + << std::right + << std::setw(wnc+2) << "NCalls" + << std::setw(wt+2) << "Excl. Min" + << std::setw(wt+2) << "Excl. Avg" + << std::setw(wt+2) << "Excl. Max" + << std::setw(wp+2) << "Max %" + << "\n" << hline << "\n"; for (const auto & allprocstat : allprocstats) { if (!allprocstat.do_print) { continue; } - amrex::OutStream() << std::setprecision(4) << std::left - << std::setw(maxfnamelen) << allprocstat.fname - << std::right - << std::setw(wnc+2) << allprocstat.navg - << std::setw(wt+2) << allprocstat.dtexmin - << std::setw(wt+2) << allprocstat.dtexavg - << std::setw(wt+2) << allprocstat.dtexmax - << std::setprecision(2) << std::setw(wp+1) << std::fixed - << allprocstat.dtexmax*(100.0/dt_max) << "%"; - amrex::OutStream().unsetf(std::ios_base::fixed); - amrex::OutStream() << "\n"; - } - amrex::OutStream() << hline << "\n"; + *os << std::setprecision(4) << std::left + << std::setw(maxfnamelen) << allprocstat.fname + << std::right + << std::setw(wnc+2) << allprocstat.navg + << std::setw(wt+2) << allprocstat.dtexmin + << std::setw(wt+2) << allprocstat.dtexavg + << std::setw(wt+2) << allprocstat.dtexmax + << std::setprecision(2) << std::setw(wp+1) << std::fixed + << allprocstat.dtexmax*(100.0/dt_max) << "%"; + os->unsetf(std::ios_base::fixed); + *os << "\n"; + } + *os << hline << "\n"; if (print_other_procstat) { allprocstats.pop_back(); } @@ -602,41 +696,41 @@ TinyProfiler::PrintStats (std::map& regstats, double dt_max) // make sure "Other" is printed at the end of the list allprocstats.push_back(other_procstat); } - amrex::OutStream() << "\n" << hline << "\n"; - amrex::OutStream() << std::left - << std::setw(maxfnamelen) << "Name" - << std::right - << std::setw(wnc+2) << "NCalls" - << std::setw(wt+2) << "Incl. Min" - << std::setw(wt+2) << "Incl. Avg" - << std::setw(wt+2) << "Incl. Max" - << std::setw(wp+2) << "Max %" - << "\n" << hline << "\n"; + *os << "\n" << hline << "\n"; + *os << std::left + << std::setw(maxfnamelen) << "Name" + << std::right + << std::setw(wnc+2) << "NCalls" + << std::setw(wt+2) << "Incl. Min" + << std::setw(wt+2) << "Incl. Avg" + << std::setw(wt+2) << "Incl. Max" + << std::setw(wp+2) << "Max %" + << "\n" << hline << "\n"; for (const auto & allprocstat : allprocstats) { if (!allprocstat.do_print) { continue; } - amrex::OutStream() << std::setprecision(4) << std::left - << std::setw(maxfnamelen) << allprocstat.fname - << std::right - << std::setw(wnc+2) << allprocstat.navg - << std::setw(wt+2) << allprocstat.dtinmin - << std::setw(wt+2) << allprocstat.dtinavg - << std::setw(wt+2) << allprocstat.dtinmax - << std::setprecision(2) << std::setw(wp+1) << std::fixed - << allprocstat.dtinmax*(100.0/dt_max) << "%"; - amrex::OutStream().unsetf(std::ios_base::fixed); - amrex::OutStream() << "\n"; - } - amrex::OutStream() << hline << "\n\n"; + *os << std::setprecision(4) << std::left + << std::setw(maxfnamelen) << allprocstat.fname + << std::right + << std::setw(wnc+2) << allprocstat.navg + << std::setw(wt+2) << allprocstat.dtinmin + << std::setw(wt+2) << allprocstat.dtinavg + << std::setw(wt+2) << allprocstat.dtinmax + << std::setprecision(2) << std::setw(wp+1) << std::fixed + << allprocstat.dtinmax*(100.0/dt_max) << "%"; + os->unsetf(std::ios_base::fixed); + *os << "\n"; + } + *os << hline << "\n\n"; } } void -TinyProfiler::PrintMemStats(std::map& memstats, - std::string const& memname, double dt_max, - double t_final) +TinyProfiler::PrintMemStats (std::map& memstats, + std::string const& memname, double dt_max, + double t_final, std::ostream* os) { // make sure the set of profiled functions is the same on all processes { @@ -780,7 +874,7 @@ TinyProfiler::PrintMemStats(std::map& memstats, maxlen[i] += 2; } - if (allstatsstr.size() == 1) { return; } + if (allstatsstr.size() == 1 || !os) { return; } int lenhline = 0; for (auto i : maxlen) { @@ -788,24 +882,26 @@ TinyProfiler::PrintMemStats(std::map& memstats, } const std::string hline(lenhline, '-'); - amrex::OutStream() << memname << " Usage:\n"; - amrex::OutStream() << hline << "\n"; + *os << memname << " Usage:\n"; + *os << hline << "\n"; for (std::size_t i=0; i(x)) << "\n";