diff --git a/include/llfio/revision.hpp b/include/llfio/revision.hpp index 8227a794d..9d544a029 100644 --- a/include/llfio/revision.hpp +++ b/include/llfio/revision.hpp @@ -1,4 +1,4 @@ // Note the second line of this file must ALWAYS be the git SHA, third line ALWAYS the git SHA update time -#define LLFIO_PREVIOUS_COMMIT_REF d3ff87fd8b91f06d4f7bd240860ef68f15a03621 -#define LLFIO_PREVIOUS_COMMIT_DATE "2021-08-20 20:17:59 +00:00" -#define LLFIO_PREVIOUS_COMMIT_UNIQUE d3ff87fd +#define LLFIO_PREVIOUS_COMMIT_REF 46f0760e7ed2b80c46acd91bacc130049620bee6 +#define LLFIO_PREVIOUS_COMMIT_DATE "2021-08-21 13:31:38 +00:00" +#define LLFIO_PREVIOUS_COMMIT_UNIQUE 46f0760e diff --git a/include/llfio/v2.0/detail/impl/map_handle.ipp b/include/llfio/v2.0/detail/impl/map_handle.ipp index f15de0806..e5e9fa94c 100644 --- a/include/llfio/v2.0/detail/impl/map_handle.ipp +++ b/include/llfio/v2.0/detail/impl/map_handle.ipp @@ -57,7 +57,7 @@ namespace detail size_t trie_count{0}; map_handle_cache_item_t *trie_children[8 * sizeof(size_t)]; bool trie_nobbledir{0}; - size_t bytes_in_cache{0}; + size_t bytes_in_cache{0}, hits{0}, misses{0}; }; static const size_t page_size_shift = [] { return QUICKCPPLIB_NAMESPACE::algorithm::bitwise_trie::detail::bitscanr(utils::page_size()); }(); class map_handle_cache_t : protected QUICKCPPLIB_NAMESPACE::algorithm::bitwise_trie::bitwise_trie @@ -66,7 +66,11 @@ namespace detail using _lock_guard = std::unique_lock; public: +#ifdef __linux__ std::atomic do_not_store_failed_count{0}; +#endif + + ~map_handle_cache_t() { trim_cache(std::chrono::steady_clock::now(), (size_t)-1); } using _base::size; void *get(size_t bytes, size_t page_size) @@ -79,8 +83,10 @@ namespace detail } if(it == _base::end() || page_size != it->page_size || _bytes != it->trie_key) { + misses++; return nullptr; } + hits++; auto *p = *it; _base::erase(it); _base::bytes_in_cache -= bytes; @@ -98,18 +104,20 @@ namespace detail _base::insert(p); _base::bytes_in_cache += bytes; } - map_handle::cache_statistics trim_cache(std::chrono::steady_clock::time_point older_than) + map_handle::cache_statistics trim_cache(std::chrono::steady_clock::time_point older_than, size_t max_items) { _lock_guard g(lock); map_handle::cache_statistics ret; - if(older_than != std::chrono::steady_clock::time_point()) + + if(older_than != std::chrono::steady_clock::time_point() && max_items > 0) { - for(auto it = _base::begin(); it != _base::end();) + // Prefer bigger items to trim than smaller ones + for(auto it = --_base::end(); it != _base::end() && max_items > 0;) { if(it->when_added <= older_than) { auto *p = *it; - it = _base::erase(it); + _base::erase(it--); const auto _bytes = p->trie_key << page_size_shift; #ifdef _WIN32 if(!win32_release_nonfile_allocations((byte *) p->addr, _bytes, MEM_RELEASE)) @@ -117,24 +125,28 @@ namespace detail if(-1 == ::munmap(p->addr, _bytes)) #endif { - LLFIO_LOG_FATAL(nullptr, "map_handle cache failed to trim a map! If on Linux, you may have exceeded the " - "64k VMA process limit, set the LLFIO_DEBUG_LINUX_MUNMAP macro at the top of posix/map_handle.ipp to cause dumping of VMAs to " - "/tmp/llfio_unmap_debug_smaps.txt, and combine with strace to figure it out."); + LLFIO_LOG_FATAL(nullptr, + "map_handle cache failed to trim a map! If on Linux, you may have exceeded the " + "64k VMA process limit, set the LLFIO_DEBUG_LINUX_MUNMAP macro at the top of posix/map_handle.ipp to cause dumping of VMAs to " + "/tmp/llfio_unmap_debug_smaps.txt, and combine with strace to figure it out."); abort(); } _base::bytes_in_cache -= _bytes; ret.bytes_just_trimmed += _bytes; ret.items_just_trimmed++; + max_items--; delete p; } else { - ++it; + --it; } } } ret.items_in_cache = _base::size(); ret.bytes_in_cache = _base::bytes_in_cache; + ret.hits = _base::hits; + ret.misses = _base::misses; return ret; } }; @@ -249,9 +261,9 @@ bool map_handle::_recycle_map() noexcept } } -map_handle::cache_statistics map_handle::trim_cache(std::chrono::steady_clock::time_point older_than) noexcept +map_handle::cache_statistics map_handle::trim_cache(std::chrono::steady_clock::time_point older_than, size_t max_items) noexcept { - return detail::map_handle_cache().trim_cache(older_than); + return detail::map_handle_cache().trim_cache(older_than, max_items); } diff --git a/include/llfio/v2.0/map_handle.hpp b/include/llfio/v2.0/map_handle.hpp index 8f3b39d75..01bee2ffe 100644 --- a/include/llfio/v2.0/map_handle.hpp +++ b/include/llfio/v2.0/map_handle.hpp @@ -345,6 +345,9 @@ guaranteed that writing into it will not fail. Note that memory mapped files hav their file contents, so except for pages written into and not yet flushed to storage, memory mapped files usually do not contribute more than a few pages each to commit charge. +\note You can determine the virtual memory accounting model for your system using `map_handle::memory_accounting()`. +This caches the result of interrogating the system, so it is fast after its first call. + The system commit limit can be easily exceeded if programs commit a lot of memory that they never use. To avoid this, for large allocations you should *reserve* pages which you don't expect to use immediately, and *later* explicitly commit and decommit them. You can request pages not accounted against the system @@ -360,6 +363,40 @@ modified pages. This makes sense, given the prevalence of code which commits mem however it also leads to anti-social outcomes such as Linux distributions enabling pathological workarounds such as over commit and specialised OOM killers. +## Map handle caching + +Repeatedly freeing and allocating virtual memory is particularly expensive because page contents must +be cleared by the system before they can be handed out again. Most kernels clear pages using an idle +loop, but if the system is busy then a surprising amount of CPU time can get consumed wiping pages. + +Most users of page allocated memory can tolerate receiving dirty pages, so `map_handle` implements +a process-local cache of previously allocated page regions which have since been `close()`d. If a +new `map_handle::map()` asks for virtual memory and there is a region in the cache, that region is +returned instead of a new region. + +Before a region is added to the cache, it is decommitted (except on Linux when overcommit is enabled, +see below). It therefore only consumes virtual address space in your process, and does not otherwise +consume any resources apart from a VMA entry in the kernel. In particular, it does not appear in +your process' RAM consumption (except on Linux). When a region is removed from the cache, +it is committed, thus adding it to your process' RAM consumption. During this decommit-recommit +process the kernel **may** choose to scavenge the memory, in which case fresh pages will be restored. +However there is a good chance that whatever the pages contained before decommit will still be there +after recommit. + +Linux has a famously messed up virtual memory implementation. LLFIO implements a strict memory +accounting model, and ordinarily we tell Linux what pages are to be counted towards commit charge +or not so you don't have to. If overcommit is disabled in the system, you then get identical strict +memory accounting like on every other OS. + +If however overcommit is enabled, we don't decommit pages, but rather mark them `LazyFree`. This is +to avoid inhibiting VMA coalescing, which is super important on Linux because of its ridiculously +low per-process VMA limit typically 64k regions on most installs. Therefore, if you do disable +overcommit, you will also need to substantially raise the maximum per process VMA limit as now LLFIO +will strictly decommit memory, which prevents VMA coalescing and thus generates lots more VMAs. + +The process local map handle cache does not self trim over time, so if you wish to reclaim virtual +address space you need to manually call `map_handle::trim_cache()` from time to time. + ## Barriers: `map_handle`, because it implements `io_handle`, implements `barrier()` in a very conservative way @@ -665,10 +702,12 @@ class LLFIO_DECL map_handle : public lockable_io_handle size_t bytes_in_cache{0}; size_t items_just_trimmed{0}; size_t bytes_just_trimmed{0}; + size_t hits{0}, misses{0}; }; /*! Get statistics about the map handle cache, optionally trimming the least recently used maps. */ - static LLFIO_HEADERS_ONLY_MEMFUNC_SPEC cache_statistics trim_cache(std::chrono::steady_clock::time_point older_than = {}) noexcept; + static LLFIO_HEADERS_ONLY_MEMFUNC_SPEC cache_statistics trim_cache(std::chrono::steady_clock::time_point older_than = {}, + size_t max_items = (size_t) -1) noexcept; //! The memory section this handle is using section_handle *section() const noexcept { return _section; } diff --git a/test/tests/map_handle_cache.cpp b/test/tests/map_handle_cache.cpp index 1756273b5..4e25c27fe 100644 --- a/test/tests/map_handle_cache.cpp +++ b/test/tests/map_handle_cache.cpp @@ -27,18 +27,20 @@ Distributed under the Boost Software License, Version 1.0. #include #include +inline QUICKCPPLIB_NOINLINE void fault(LLFIO_V2_NAMESPACE::map_handle &mh) +{ + for(auto *p = (volatile char *) mh.address(); p < (volatile char *) mh.address() + mh.length(); p += mh.page_size()) + { + *p = 1; + } +}; + static inline void TestMapHandleCache() { static constexpr size_t ITEMS_COUNT = 10000; namespace llfio = LLFIO_V2_NAMESPACE; - bool free_cache_immediately = false; + bool free_cache_immediately = true; auto test = [&] { - auto fault = [](llfio::map_handle &mh) { - for(auto *p = (volatile char *) mh.address(); p < (volatile char *) mh.address() + mh.length(); p += mh.page_size()) - { - *p = 1; - } - }; QUICKCPPLIB_NAMESPACE::algorithm::small_prng::small_prng rand; std::vector maps; for(size_t n = 0; n < ITEMS_COUNT; n++) @@ -59,7 +61,8 @@ static inline void TestMapHandleCache() BOOST_REQUIRE(stats.items_in_cache == 0); } auto begin = std::chrono::steady_clock::now(); - for(size_t n = 0; n < ITEMS_COUNT * 10; n++) + size_t ops = 0; + for(size_t n = 0; n < ITEMS_COUNT * 100; n++) { auto v = rand(); auto toallocate = (v >> 2) & (128 * 1024 - 1); @@ -70,10 +73,12 @@ static inline void TestMapHandleCache() if(v & 1) { maps[n % ITEMS_COUNT].close().value(); + ops++; } else { fault((maps[n % ITEMS_COUNT] = llfio::map_handle::map(toallocate, false).value())); + ops += 2; } if(free_cache_immediately) { @@ -85,8 +90,11 @@ static inline void TestMapHandleCache() auto end = std::chrono::steady_clock::now(); { auto stats = llfio::map_handle::trim_cache(); - std::cout << "\nIn the map_handle cache after churn there are " << stats.bytes_in_cache << " bytes in the cache in " << stats.items_in_cache << " items." - << std::endl; + auto usage = llfio::utils::current_process_memory_usage().value(); + std::cout << "\n\nIn the map_handle cache after churn there are " << (stats.bytes_in_cache / 1024.0 / 1024.0) << " Mb in the cache in " + << stats.items_in_cache << " items. There were " << stats.hits << " hits and " << stats.misses + << " misses. Process virtual address space used is " << (usage.total_address_space_in_use / 1024.0 / 1024.0 / 1024.0) + << " Gb and commit charge is " << (usage.private_committed / 1024.0 / 1024.0) << " Mb." << std::endl; } for(auto &i : maps) { @@ -94,14 +102,17 @@ static inline void TestMapHandleCache() } { auto stats = llfio::map_handle::trim_cache(); - std::cout << "\nIn the map_handle cache after releasing everything there are " << stats.bytes_in_cache << " bytes in the cache in " - << stats.items_in_cache << " items." << std::endl; + auto usage = llfio::utils::current_process_memory_usage().value(); + std::cout << "\nIn the map_handle cache after releasing everything there are " << (stats.bytes_in_cache / 1024.0 / 1024.0) << " Mb in the cache in " + << stats.items_in_cache << " items. Process virtual address space used is " << (usage.total_address_space_in_use / 1024.0 / 1024.0 / 1024.0) + << " Gb and commit charge is " << (usage.private_committed / 1024.0 / 1024.0) << " Mb." << std::endl; } - std::cout << "With free_cache_immediately = " << free_cache_immediately << " it took " - << (std::chrono::duration_cast(end - begin).count() / 1000.0 / ITEMS_COUNT) << " us per allocation-free." << std::endl; + std::cout << "\nWith free_cache_immediately = " << free_cache_immediately << " it took " + << (std::chrono::duration_cast(end - begin).count() / 1000.0 / ops) << " us per allocation-free." + << std::endl; }; test(); - free_cache_immediately = true; + free_cache_immediately = false; test(); } diff --git a/test/tests/utils.cpp b/test/tests/utils.cpp index 077122298..82967e871 100644 --- a/test/tests/utils.cpp +++ b/test/tests/utils.cpp @@ -172,14 +172,20 @@ static inline void TestCurrentProcessMemoryUsage() BOOST_CHECK(within(before_anything, after_fault, 1024, 1024, 1024, 1024)); BOOST_CHECK(within(before_anything, after_decommit, 1024, 0, 0, 0)); #ifdef _WIN32 - BOOST_CHECK(within(before_anything, after_zero, 1024, 0, 1024, 0)); // may not evict faulted set on POSIX + BOOST_CHECK(within(before_anything, after_zero, 1024, 0, 1024, 0)); + BOOST_CHECK(within(before_anything, after_do_not_store, 1024, 0, 1024, 0)); // do_not_store() decreases RSS but not commit on Windows #else - (void) after_zero; + (void) after_zero; // may not evict faulted set on POSIX + BOOST_CHECK(within(before_anything, after_do_not_store, 1024, 1024, 0, 1024)); // do_not_store() decreases commit but does not RSS on POSIX #endif - BOOST_CHECK(within(before_anything, after_do_not_store, 1024, 0, 1024, 0)); #endif } std::cout << "\nFor file mapping:\n"; + { + auto stats = llfio::map_handle::trim_cache(std::chrono::steady_clock::now()); + BOOST_REQUIRE(stats.bytes_in_cache == 0); + BOOST_REQUIRE(stats.items_in_cache == 0); + } { auto sectionh = llfio::section_handle::section(1024 * 1024 * 1024).value(); llfio::utils::process_memory_usage before_anything, after_reserve, after_commit, after_fault, after_decommit, after_zero, after_do_not_store;