From a9215c6ecc9b268c522386840c869803768e3540 Mon Sep 17 00:00:00 2001 From: "Niall Douglas (s [underscore] sourceforge {at} nedprod [dot] com)" Date: Fri, 10 Sep 2021 15:11:23 +0100 Subject: [PATCH] current_process_memory_usage: Fix bug in accurate committed private calculation routine on Linux. --- include/llfio/v2.0/detail/impl/map_handle.ipp | 11 +++++++---- include/llfio/v2.0/detail/impl/posix/utils.ipp | 10 +++++----- include/llfio/v2.0/utils.hpp | 6 ++++-- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/include/llfio/v2.0/detail/impl/map_handle.ipp b/include/llfio/v2.0/detail/impl/map_handle.ipp index 5e1e625d2..db18b0779 100644 --- a/include/llfio/v2.0/detail/impl/map_handle.ipp +++ b/include/llfio/v2.0/detail/impl/map_handle.ipp @@ -70,7 +70,7 @@ namespace detail std::atomic do_not_store_failed_count{0}; #endif - ~map_handle_cache_t() { trim_cache(std::chrono::steady_clock::now(), (size_t)-1); } + ~map_handle_cache_t() { trim_cache(std::chrono::steady_clock::now(), (size_t) -1); } using _base::size; void *get(size_t bytes, size_t page_size) @@ -131,7 +131,8 @@ namespace detail if(-1 == ::munmap(p->addr, _bytes)) #endif { - // fprintf(stderr, "munmap failed with %s. addr was %p bytes was %zu. page_size_shift was %zu\n", strerror(errno), p->addr, _bytes, page_size_shift); + // fprintf(stderr, "munmap failed with %s. addr was %p bytes was %zu. page_size_shift was %zu\n", strerror(errno), p->addr, _bytes, + // page_size_shift); LLFIO_LOG_FATAL(nullptr, "FATAL: map_handle cache failed to trim a map! If on Linux, you may have exceeded the " "64k VMA process limit, set the LLFIO_DEBUG_LINUX_MUNMAP macro at the top of posix/map_handle.ipp to cause dumping of VMAs to " @@ -238,16 +239,18 @@ bool map_handle::_recycle_map() noexcept return false; } #else +#ifdef __linux__ if(memory_accounting() == memory_accounting_kind::commit_charge) +#endif { if(!do_mmap(_v, _addr, MAP_FIXED, nullptr, _pagesize, _length, 0, section_handle::flag::none | section_handle::flag::nocommit)) { return false; } } +#ifdef __linux__ else { -#ifdef __linux__ if(c.do_not_store_failed_count.load(std::memory_order_relaxed) < 10) { auto r = do_not_store({_addr, _length}); @@ -256,8 +259,8 @@ bool map_handle::_recycle_map() noexcept c.do_not_store_failed_count.fetch_add(1, std::memory_order_relaxed); } } -#endif } +#endif #endif return c.add(_reservation, _pagesize, _addr); } diff --git a/include/llfio/v2.0/detail/impl/posix/utils.ipp b/include/llfio/v2.0/detail/impl/posix/utils.ipp index 19f319c38..fe3aa5d24 100644 --- a/include/llfio/v2.0/detail/impl/posix/utils.ipp +++ b/include/llfio/v2.0/detail/impl/posix/utils.ipp @@ -459,15 +459,15 @@ namespace utils // std::cerr << "Anon entries:"; for(auto &i : anon_entries) { - OUTCOME_TRY(auto &&size, parse(i, "\nSize:")); - OUTCOME_TRY(auto &&rss, parse(i, "\nRss:")); - OUTCOME_TRY(auto &&anonymous, parse(i, "\nAnonymous:")); - OUTCOME_TRY(auto &&lazyfree, parse(i, "\nLazyFree:")); + OUTCOME_TRY(auto &&size, parse(i, "\nSize:")); // amount committed + OUTCOME_TRY(auto &&rss, parse(i, "\nRss:")); // amount paged in + OUTCOME_TRY(auto &&anonymous, parse(i, "\nAnonymous:")); // amount actually dirtied + OUTCOME_TRY(auto &&lazyfree, parse(i, "\nLazyFree:")); // amount "decommitted" on Linux to avoid a VMA split if(size != (uint64_t) -1 && rss != (uint64_t) -1 && anonymous != (uint64_t) -1) { ret.total_address_space_in_use += size; ret.total_address_space_paged_in += rss; - ret.private_committed += anonymous; + ret.private_committed += size; if(lazyfree != (uint64_t) -1) { ret.total_address_space_paged_in -= lazyfree; diff --git a/include/llfio/v2.0/utils.hpp b/include/llfio/v2.0/utils.hpp index 4b8501da8..0abec3e83 100644 --- a/include/llfio/v2.0/utils.hpp +++ b/include/llfio/v2.0/utils.hpp @@ -253,8 +253,10 @@ namespace utils `process_memory_usage::want::private_committed_inaccurate` can yield significant performance gains. If you set `process_memory_usage::want::private_committed_inaccurate`, we use `/proc/pid/smaps_rollup` and `/proc/pid/maps` to calculate the results. This - cannot distinguish between regions with the accounted - flag enabled or disabled. By default, this fast path is enabled. + cannot distinguish between regions with the accounted flag enabled or disabled, and + be aware that glibc's `malloc()` for some inexplicable reason doesn't set the + accounted flag on regions it commits, so the inaccurate flag will always yield + higher numbers for private commited on Linux. By default, this fast path is enabled. \note `/proc/pid/smaps_rollup` was added in Linux kernel 3.16, so the default specifying `process_memory_usage::want::private_committed_inaccurate` will always fail on Linux