current_process_memory_usage: Fix bug in accurate committed private c…

…alculation routine on Linux.
ned14 · Sep 10, 2021 · a9215c6 · a9215c6
1 parent da688a1
commit a9215c6
Show file tree

Hide file tree

Showing 3 changed files with 16 additions and 11 deletions.
diff --git a/include/llfio/v2.0/detail/impl/map_handle.ipp b/include/llfio/v2.0/detail/impl/map_handle.ipp
@@ -70,7 +70,7 @@ namespace detail
     std::atomic<unsigned> do_not_store_failed_count{0};
 #endif
 
-    ~map_handle_cache_t() { trim_cache(std::chrono::steady_clock::now(), (size_t)-1); }
+    ~map_handle_cache_t() { trim_cache(std::chrono::steady_clock::now(), (size_t) -1); }
 
     using _base::size;
     void *get(size_t bytes, size_t page_size)
@@ -131,7 +131,8 @@ namespace detail
             if(-1 == ::munmap(p->addr, _bytes))
 #endif
             {
-              // fprintf(stderr, "munmap failed with %s. addr was %p bytes was %zu. page_size_shift was %zu\n", strerror(errno), p->addr, _bytes, page_size_shift);
+              // fprintf(stderr, "munmap failed with %s. addr was %p bytes was %zu. page_size_shift was %zu\n", strerror(errno), p->addr, _bytes,
+              // page_size_shift);
               LLFIO_LOG_FATAL(nullptr,
                               "FATAL: map_handle cache failed to trim a map! If on Linux, you may have exceeded the "
                               "64k VMA process limit, set the LLFIO_DEBUG_LINUX_MUNMAP macro at the top of posix/map_handle.ipp to cause dumping of VMAs to "
@@ -238,16 +239,18 @@ bool map_handle::_recycle_map() noexcept
       return false;
     }
 #else
+#ifdef __linux__
     if(memory_accounting() == memory_accounting_kind::commit_charge)
+#endif
     {
       if(!do_mmap(_v, _addr, MAP_FIXED, nullptr, _pagesize, _length, 0, section_handle::flag::none | section_handle::flag::nocommit))
       {
         return false;
       }
     }
+#ifdef __linux__
     else
     {
-#ifdef __linux__
       if(c.do_not_store_failed_count.load(std::memory_order_relaxed) < 10)
       {
         auto r = do_not_store({_addr, _length});
@@ -256,8 +259,8 @@ bool map_handle::_recycle_map() noexcept
           c.do_not_store_failed_count.fetch_add(1, std::memory_order_relaxed);
         }
       }
-#endif
     }
+#endif
 #endif
     return c.add(_reservation, _pagesize, _addr);
   }

diff --git a/include/llfio/v2.0/detail/impl/posix/utils.ipp b/include/llfio/v2.0/detail/impl/posix/utils.ipp
@@ -459,15 +459,15 @@ namespace utils
           // std::cerr << "Anon entries:";
           for(auto &i : anon_entries)
           {
-            OUTCOME_TRY(auto &&size, parse(i, "\nSize:"));
-            OUTCOME_TRY(auto &&rss, parse(i, "\nRss:"));
-            OUTCOME_TRY(auto &&anonymous, parse(i, "\nAnonymous:"));
-            OUTCOME_TRY(auto &&lazyfree, parse(i, "\nLazyFree:"));
+            OUTCOME_TRY(auto &&size, parse(i, "\nSize:"));            // amount committed
+            OUTCOME_TRY(auto &&rss, parse(i, "\nRss:"));              // amount paged in
+            OUTCOME_TRY(auto &&anonymous, parse(i, "\nAnonymous:"));  // amount actually dirtied
+            OUTCOME_TRY(auto &&lazyfree, parse(i, "\nLazyFree:"));    // amount "decommitted" on Linux to avoid a VMA split
             if(size != (uint64_t) -1 && rss != (uint64_t) -1 && anonymous != (uint64_t) -1)
             {
               ret.total_address_space_in_use += size;
               ret.total_address_space_paged_in += rss;
-              ret.private_committed += anonymous;
+              ret.private_committed += size;
               if(lazyfree != (uint64_t) -1)
               {
                 ret.total_address_space_paged_in -= lazyfree;

diff --git a/include/llfio/v2.0/utils.hpp b/include/llfio/v2.0/utils.hpp
@@ -253,8 +253,10 @@ namespace utils
   `process_memory_usage::want::private_committed_inaccurate` can yield significant
   performance gains. If you set `process_memory_usage::want::private_committed_inaccurate`,
   we use `/proc/pid/smaps_rollup` and `/proc/pid/maps` to calculate the results. This
-  cannot distinguish between regions with the accounted
-  flag enabled or disabled. By default, this fast path is enabled.
+  cannot distinguish between regions with the accounted flag enabled or disabled, and
+  be aware that glibc's `malloc()` for some inexplicable reason doesn't set the
+  accounted flag on regions it commits, so the inaccurate flag will always yield
+  higher numbers for private commited on Linux. By default, this fast path is enabled.
 
   \note `/proc/pid/smaps_rollup` was added in Linux kernel 3.16, so the default specifying
   `process_memory_usage::want::private_committed_inaccurate` will always fail on Linux