From d5bf3c3c8b770d940eae508e01b9e733f025c291 Mon Sep 17 00:00:00 2001 From: Git User Date: Tue, 24 Mar 2026 23:39:35 +0530 Subject: [PATCH 01/12] performance: Optimize shared_mutex with lock-free fast paths and structural state management --- .../hpx/synchronization/shared_mutex.hpp | 25 +++++++ tests/performance/local/CMakeLists.txt | 2 + .../local/shared_mutex_overhead.cpp | 75 +++++++++++++++++++ 3 files changed, 102 insertions(+) create mode 100644 tests/performance/local/shared_mutex_overhead.cpp diff --git a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp index f62b5b445e6e..395a6c960261 100644 --- a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp +++ b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp @@ -140,6 +140,27 @@ namespace hpx::detail { return true; } + bool try_unlock_shared_fast() + { + while (true) + { + auto s = state.load(std::memory_order_acquire); + if (s.data.exclusive || s.data.exclusive_waiting_blocked || + s.data.upgrade || s.data.shared_count <= 1) + { + return false; + } + + auto s1 = s; + --s.data.shared_count; + if (set_state(s1, s)) + { + return true; + } + s = s1; + } + } + void unlock_shared() { while (true) @@ -510,6 +531,8 @@ namespace hpx::detail { void lock_shared() { auto data = data_; + if (data->try_lock_shared()) + return; data->lock_shared(); } @@ -522,6 +545,8 @@ namespace hpx::detail { void unlock_shared() { auto data = data_; + if (data->try_unlock_shared_fast()) + return; data->unlock_shared(); } diff --git a/tests/performance/local/CMakeLists.txt b/tests/performance/local/CMakeLists.txt index a920c07421d6..f5ac538e28a1 100644 --- a/tests/performance/local/CMakeLists.txt +++ b/tests/performance/local/CMakeLists.txt @@ -25,6 +25,7 @@ set(benchmarks skynet wait_all_timings benchmark_stealing + shared_mutex_overhead ) set(timed_task_spawn_SOURCES activate_counters.cpp) @@ -144,6 +145,7 @@ set(print_heterogeneous_payloads_PARAMETERS NO_HPX_MAIN) set(skynet_PARAMETERS NO_HPX_MAIN) set(timed_task_spawn_PARAMETERS NO_HPX_MAIN) set(benchmark_stealing_PARAMETERS NO_HPX_MAIN) +set(shared_mutex_overhead_PARAMETERS NO_HPX_MAIN) set(hpx_tls_overhead_PARAMETERS NO_HPX_MAIN) set(native_tls_overhead_PARAMETERS NO_HPX_MAIN) set(coroutines_call_overhead_PARAMETERS NO_HPX_MAIN) diff --git a/tests/performance/local/shared_mutex_overhead.cpp b/tests/performance/local/shared_mutex_overhead.cpp new file mode 100644 index 000000000000..0a751fc9aae9 --- /dev/null +++ b/tests/performance/local/shared_mutex_overhead.cpp @@ -0,0 +1,75 @@ +// (C) Copyright 2024 Arpit Khandelwal +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +std::uint64_t num_iterations = 100000; +std::uint64_t reader_threads = 4; + +hpx::shared_mutex mtx; + +void reader() +{ + for (std::uint64_t i = 0; i < num_iterations; ++i) + { + std::shared_lock l(mtx); + } +} + +int hpx_main(hpx::program_options::variables_map& vm) +{ + num_iterations = vm["iterations"].as(); + reader_threads = hpx::get_num_worker_threads(); + + std::cout << "Starting benchmark with " << reader_threads << " threads..." + << std::endl; + + std::vector> futures; + futures.reserve(reader_threads); + + hpx::chrono::high_resolution_timer walltime; + + for (std::uint64_t i = 0; i < reader_threads; ++i) + { + futures.push_back(hpx::async(&reader)); + } + + hpx::wait_all(futures); + + double const duration = walltime.elapsed(); + + std::cout << "Total time: " << duration << " seconds" << std::endl; + std::cout << "Average time per reader thread: " << duration / reader_threads + << " seconds" << std::endl; + + hpx::util::print_cdash_timing("SharedMutexOverhead", duration); + + return hpx::local::finalize(); +} + +int main(int argc, char* argv[]) +{ + hpx::program_options::options_description cmdline( + "usage: " HPX_APPLICATION_STRING " [options]"); + + cmdline.add_options()("iterations", + hpx::program_options::value()->default_value(100000), + "number of iterations per thread"); + + hpx::local::init_params init_args; + init_args.desc_cmdline = cmdline; + + return hpx::local::init(hpx_main, argc, argv, init_args); +} From 6acdd90a6397e97ae4fec5af1b5320b2c6facf3c Mon Sep 17 00:00:00 2001 From: arpittkhandelwal Date: Thu, 16 Apr 2026 07:58:01 +0530 Subject: [PATCH 02/12] performance: Address maintainer feedback on shared_mutex spin loops and update copyright --- .../hpx/synchronization/shared_mutex.hpp | 33 ++++++++++++------- .../local/shared_mutex_overhead.cpp | 2 +- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp index 395a6c960261..914d2dcad1b6 100644 --- a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp +++ b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp @@ -121,9 +121,9 @@ namespace hpx::detail { bool try_lock_shared() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); if (s.data.exclusive || s.data.exclusive_waiting_blocked) { return false; @@ -136,6 +136,7 @@ namespace hpx::detail { { break; } + s = state.load(std::memory_order_acquire); } return true; } @@ -163,9 +164,9 @@ namespace hpx::detail { void unlock_shared() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); auto s1 = s; if (--s.data.shared_count == 0) @@ -205,14 +206,15 @@ namespace hpx::detail { { break; } + s = state.load(std::memory_order_acquire); } } void lock() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); while (s.data.shared_count != 0 || s.data.exclusive) { auto s1 = s; @@ -235,14 +237,15 @@ namespace hpx::detail { { break; } + s = state.load(std::memory_order_acquire); } } bool try_lock() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); if (s.data.shared_count || s.data.exclusive) { return false; @@ -255,15 +258,16 @@ namespace hpx::detail { { break; } + s = state.load(std::memory_order_acquire); } return true; } void unlock() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); auto s1 = s; s.data.exclusive = false; @@ -276,6 +280,7 @@ namespace hpx::detail { release_waiters(lk); break; } + s = state.load(std::memory_order_acquire); } } @@ -308,9 +313,9 @@ namespace hpx::detail { bool try_lock_upgrade() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); if (s.data.exclusive || s.data.exclusive_waiting_blocked || s.data.upgrade) { @@ -325,15 +330,16 @@ namespace hpx::detail { { break; } + s = state.load(std::memory_order_acquire); } return true; } void unlock_upgrade() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); auto s1 = s; bool release = false; @@ -358,6 +364,7 @@ namespace hpx::detail { { break; } + s = state.load(std::memory_order_acquire); } } @@ -405,9 +412,9 @@ namespace hpx::detail { void unlock_and_lock_upgrade() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); auto s1 = s; s.data.exclusive = false; @@ -422,14 +429,15 @@ namespace hpx::detail { release_waiters(lk); break; } + s = state.load(std::memory_order_acquire); } } void unlock_and_lock_shared() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); auto s1 = s; s.data.exclusive = false; @@ -443,14 +451,15 @@ namespace hpx::detail { release_waiters(lk); break; } + s = state.load(std::memory_order_acquire); } } bool try_unlock_shared_and_lock() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); if (s.data.exclusive || s.data.exclusive_waiting_blocked || s.data.upgrade || s.data.shared_count != 1) { @@ -465,15 +474,16 @@ namespace hpx::detail { { break; } + s = state.load(std::memory_order_acquire); } return true; } void unlock_upgrade_and_lock_shared() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); auto s1 = s; s.data.exclusive_waiting_blocked = false; @@ -486,6 +496,7 @@ namespace hpx::detail { release_waiters(lk); break; } + s = state.load(std::memory_order_acquire); } } diff --git a/tests/performance/local/shared_mutex_overhead.cpp b/tests/performance/local/shared_mutex_overhead.cpp index 0a751fc9aae9..cb06faa497c7 100644 --- a/tests/performance/local/shared_mutex_overhead.cpp +++ b/tests/performance/local/shared_mutex_overhead.cpp @@ -1,4 +1,4 @@ -// (C) Copyright 2024 Arpit Khandelwal +// (C) Copyright 2026 Arpit Khandelwal // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying From 47de6f45ad9f68c8810fb3d110f69845128ba816 Mon Sep 17 00:00:00 2001 From: arpittkhandelwal Date: Fri, 17 Apr 2026 11:44:22 +0530 Subject: [PATCH 03/12] performance: Refine set_state short-circuit and reuse state from failed CAS --- .../hpx/synchronization/shared_mutex.hpp | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp index 914d2dcad1b6..299310be4dd7 100644 --- a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp +++ b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp @@ -76,16 +76,26 @@ namespace hpx::detail { bool set_state(shared_state& s1, shared_state& s) noexcept { + auto current_state = state.load(std::memory_order_relaxed); + if (s1.value != current_state.value) + { + s1 = current_state; + return false; + } + ++s.data.tag; - return s1.value == state.load(std::memory_order_relaxed).value && - state.compare_exchange_strong(s1, s, std::memory_order_release); + return state.compare_exchange_strong(s1, s, std::memory_order_release); } bool set_state(shared_state& s1, shared_state& s, std::unique_lock& lk) noexcept { - if (s1.value != state.load(std::memory_order_relaxed).value) + auto current_state = state.load(std::memory_order_relaxed); + if (s1.value != current_state.value) + { + s1 = current_state; return false; + } ++s.data.tag; @@ -136,7 +146,7 @@ namespace hpx::detail { { break; } - s = state.load(std::memory_order_acquire); + s = s1; } return true; } @@ -206,7 +216,7 @@ namespace hpx::detail { { break; } - s = state.load(std::memory_order_acquire); + s = s1; } } @@ -237,7 +247,7 @@ namespace hpx::detail { { break; } - s = state.load(std::memory_order_acquire); + s = s1; } } @@ -258,7 +268,7 @@ namespace hpx::detail { { break; } - s = state.load(std::memory_order_acquire); + s = s1; } return true; } @@ -330,7 +340,7 @@ namespace hpx::detail { { break; } - s = state.load(std::memory_order_acquire); + s = s1; } return true; } @@ -364,7 +374,7 @@ namespace hpx::detail { { break; } - s = state.load(std::memory_order_acquire); + s = s1; } } @@ -474,7 +484,7 @@ namespace hpx::detail { { break; } - s = state.load(std::memory_order_acquire); + s = s1; } return true; } From a371a39a99e8b289470a46e7264ce6574c073b0e Mon Sep 17 00:00:00 2001 From: arpittkhandelwal Date: Sat, 18 Apr 2026 11:44:25 +0530 Subject: [PATCH 04/12] performance: Replace all redundant state.load with s=s1 in spin loops --- .../include/hpx/synchronization/shared_mutex.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp index 299310be4dd7..a16be3bf865e 100644 --- a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp +++ b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp @@ -153,9 +153,9 @@ namespace hpx::detail { bool try_unlock_shared_fast() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); if (s.data.exclusive || s.data.exclusive_waiting_blocked || s.data.upgrade || s.data.shared_count <= 1) { @@ -290,7 +290,7 @@ namespace hpx::detail { release_waiters(lk); break; } - s = state.load(std::memory_order_acquire); + s = s1; } } @@ -439,7 +439,7 @@ namespace hpx::detail { release_waiters(lk); break; } - s = state.load(std::memory_order_acquire); + s = s1; } } @@ -461,7 +461,7 @@ namespace hpx::detail { release_waiters(lk); break; } - s = state.load(std::memory_order_acquire); + s = s1; } } @@ -506,7 +506,7 @@ namespace hpx::detail { release_waiters(lk); break; } - s = state.load(std::memory_order_acquire); + s = s1; } } From f00d4da6fe7e618f53a96dd2a7eb4cb06b115557 Mon Sep 17 00:00:00 2001 From: arpittkhandelwal Date: Sat, 18 Apr 2026 11:54:03 +0530 Subject: [PATCH 05/12] performance: Fix clang-format line wrap in set_state --- .../include/hpx/synchronization/shared_mutex.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp index a16be3bf865e..da2d128c3cf6 100644 --- a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp +++ b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp @@ -84,7 +84,8 @@ namespace hpx::detail { } ++s.data.tag; - return state.compare_exchange_strong(s1, s, std::memory_order_release); + return state.compare_exchange_strong( + s1, s, std::memory_order_release); } bool set_state(shared_state& s1, shared_state& s, From 13ad2b2e63c240eae180e21733c90bbc6b432524 Mon Sep 17 00:00:00 2001 From: arpittkhandelwal Date: Sun, 19 Apr 2026 20:37:34 +0530 Subject: [PATCH 06/12] performance: Eliminate redundant atomic refcounts in shared_mutex wrapper --- .../hpx/synchronization/shared_mutex.hpp | 65 ++++++------------- 1 file changed, 19 insertions(+), 46 deletions(-) diff --git a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp index da2d128c3cf6..ff9d5a6ad16b 100644 --- a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp +++ b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp @@ -76,32 +76,19 @@ namespace hpx::detail { bool set_state(shared_state& s1, shared_state& s) noexcept { - auto current_state = state.load(std::memory_order_relaxed); - if (s1.value != current_state.value) - { - s1 = current_state; - return false; - } - ++s.data.tag; return state.compare_exchange_strong( - s1, s, std::memory_order_release); + s1, s, std::memory_order_release, std::memory_order_relaxed); } bool set_state(shared_state& s1, shared_state& s, std::unique_lock& lk) noexcept { - auto current_state = state.load(std::memory_order_relaxed); - if (s1.value != current_state.value) - { - s1 = current_state; - return false; - } - ++s.data.tag; lk = std::unique_lock(state_change); - if (state.compare_exchange_strong(s1, s, std::memory_order_release)) + if (state.compare_exchange_strong( + s1, s, std::memory_order_release, std::memory_order_relaxed)) return true; lk.unlock(); @@ -552,90 +539,76 @@ namespace hpx::detail { void lock_shared() { - auto data = data_; - if (data->try_lock_shared()) + if (data_->try_lock_shared()) return; - data->lock_shared(); + data_->lock_shared(); } bool try_lock_shared() { - auto data = data_; - return data->try_lock_shared(); + return data_->try_lock_shared(); } void unlock_shared() { - auto data = data_; - if (data->try_unlock_shared_fast()) + if (data_->try_unlock_shared_fast()) return; - data->unlock_shared(); + data_->unlock_shared(); } void lock() { - auto data = data_; - data->lock(); + data_->lock(); } bool try_lock() { - auto data = data_; - return data->try_lock(); + return data_->try_lock(); } void unlock() { - auto data = data_; - data->unlock(); + data_->unlock(); } void lock_upgrade() { - auto data = data_; - data->lock_upgrade(); + data_->lock_upgrade(); } bool try_lock_upgrade() { - auto data = data_; - return data->try_lock_upgrade(); + return data_->try_lock_upgrade(); } void unlock_upgrade() { - auto data = data_; - data->unlock_upgrade(); + data_->unlock_upgrade(); } void unlock_upgrade_and_lock() { - auto data = data_; - data->unlock_upgrade_and_lock(); + data_->unlock_upgrade_and_lock(); } void unlock_and_lock_upgrade() { - auto data = data_; - data->unlock_and_lock_upgrade(); + data_->unlock_and_lock_upgrade(); } void unlock_and_lock_shared() { - auto data = data_; - data->unlock_and_lock_shared(); + data_->unlock_and_lock_shared(); } bool try_unlock_shared_and_lock() { - auto data = data_; - return data->try_unlock_shared_and_lock(); + return data_->try_unlock_shared_and_lock(); } void unlock_upgrade_and_lock_shared() { - auto data = data_; - data->unlock_upgrade_and_lock_shared(); + data_->unlock_upgrade_and_lock_shared(); } }; } // namespace hpx::detail From 5c3a426c44fb7b08136e22e16f6fd2c4976fb028 Mon Sep 17 00:00:00 2001 From: arpittkhandelwal Date: Sun, 19 Apr 2026 20:40:56 +0530 Subject: [PATCH 07/12] style: Fix clang-format line wrapping --- .../include/hpx/synchronization/shared_mutex.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp index ff9d5a6ad16b..0b649accf4bf 100644 --- a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp +++ b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp @@ -77,8 +77,8 @@ namespace hpx::detail { bool set_state(shared_state& s1, shared_state& s) noexcept { ++s.data.tag; - return state.compare_exchange_strong( - s1, s, std::memory_order_release, std::memory_order_relaxed); + return state.compare_exchange_strong(s1, s, std::memory_order_release, + std::memory_order_relaxed); } bool set_state(shared_state& s1, shared_state& s, @@ -87,8 +87,8 @@ namespace hpx::detail { ++s.data.tag; lk = std::unique_lock(state_change); - if (state.compare_exchange_strong( - s1, s, std::memory_order_release, std::memory_order_relaxed)) + if (state.compare_exchange_strong(s1, s, std::memory_order_release, + std::memory_order_relaxed)) return true; lk.unlock(); From 38d7f22141c222462db907f95f4eb44d28a12c26 Mon Sep 17 00:00:00 2001 From: arpittkhandelwal Date: Sun, 19 Apr 2026 22:07:13 +0530 Subject: [PATCH 08/12] performance: Final optimized shared_mutex with lifetime safety refcounts --- .../hpx/synchronization/shared_mutex.hpp | 46 ++++++++++++------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp index 0b649accf4bf..527d8bf9ccbd 100644 --- a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp +++ b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp @@ -539,76 +539,90 @@ namespace hpx::detail { void lock_shared() { - if (data_->try_lock_shared()) + auto data = data_; + if (data->try_lock_shared()) return; - data_->lock_shared(); + data->lock_shared(); } bool try_lock_shared() { - return data_->try_lock_shared(); + auto data = data_; + return data->try_lock_shared(); } void unlock_shared() { - if (data_->try_unlock_shared_fast()) + auto data = data_; + if (data->try_unlock_shared_fast()) return; - data_->unlock_shared(); + data->unlock_shared(); } void lock() { - data_->lock(); + auto data = data_; + data->lock(); } bool try_lock() { - return data_->try_lock(); + auto data = data_; + return data->try_lock(); } void unlock() { - data_->unlock(); + auto data = data_; + data->unlock(); } void lock_upgrade() { - data_->lock_upgrade(); + auto data = data_; + data->lock_upgrade(); } bool try_lock_upgrade() { - return data_->try_lock_upgrade(); + auto data = data_; + return data->try_lock_upgrade(); } void unlock_upgrade() { - data_->unlock_upgrade(); + auto data = data_; + data->unlock_upgrade(); } void unlock_upgrade_and_lock() { - data_->unlock_upgrade_and_lock(); + auto data = data_; + data->unlock_upgrade_and_lock(); } void unlock_and_lock_upgrade() { - data_->unlock_and_lock_upgrade(); + auto data = data_; + data->unlock_and_lock_upgrade(); } void unlock_and_lock_shared() { - data_->unlock_and_lock_shared(); + auto data = data_; + data->unlock_and_lock_shared(); } bool try_unlock_shared_and_lock() { - return data_->try_unlock_shared_and_lock(); + auto data = data_; + return data->try_unlock_shared_and_lock(); } void unlock_upgrade_and_lock_shared() { - data_->unlock_upgrade_and_lock_shared(); + auto data = data_; + data->unlock_upgrade_and_lock_shared(); } }; } // namespace hpx::detail From e7605f49480d633b1110b5efa71487f3d9d5cd11 Mon Sep 17 00:00:00 2001 From: arpittkhandelwal Date: Sun, 19 Apr 2026 22:09:53 +0530 Subject: [PATCH 09/12] style: Match clang-format requirements for compare_exchange_strong --- .../include/hpx/synchronization/shared_mutex.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp index 527d8bf9ccbd..6e40d78a3012 100644 --- a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp +++ b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp @@ -77,8 +77,8 @@ namespace hpx::detail { bool set_state(shared_state& s1, shared_state& s) noexcept { ++s.data.tag; - return state.compare_exchange_strong(s1, s, std::memory_order_release, - std::memory_order_relaxed); + return state.compare_exchange_strong( + s1, s, std::memory_order_release, std::memory_order_relaxed); } bool set_state(shared_state& s1, shared_state& s, @@ -87,8 +87,8 @@ namespace hpx::detail { ++s.data.tag; lk = std::unique_lock(state_change); - if (state.compare_exchange_strong(s1, s, std::memory_order_release, - std::memory_order_relaxed)) + if (state.compare_exchange_strong( + s1, s, std::memory_order_release, std::memory_order_relaxed)) return true; lk.unlock(); From 1ed78e7d55b7fcb52b477a19a45fb1b0fbb25e47 Mon Sep 17 00:00:00 2001 From: arpittkhandelwal Date: Thu, 30 Apr 2026 00:29:40 +0530 Subject: [PATCH 10/12] style: fix clang-format issues in shared_mutex.hpp --- .../include/hpx/synchronization/shared_mutex.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp index 6e40d78a3012..fc9bb317ea87 100644 --- a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp +++ b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp @@ -87,8 +87,8 @@ namespace hpx::detail { ++s.data.tag; lk = std::unique_lock(state_change); - if (state.compare_exchange_strong( - s1, s, std::memory_order_release, std::memory_order_relaxed)) + if (state.compare_exchange_strong(s1, s, std::memory_order_release, + std::memory_order_relaxed)) return true; lk.unlock(); From 81041ec491c2a499c65b1527e9136841b1150036 Mon Sep 17 00:00:00 2001 From: arpittkhandelwal Date: Wed, 6 May 2026 08:37:31 +0530 Subject: [PATCH 11/12] performance: Simplify unlock_shared and add missing include to benchmark --- .../hpx/synchronization/shared_mutex.hpp | 22 ------------------- .../local/shared_mutex_overhead.cpp | 1 + 2 files changed, 1 insertion(+), 22 deletions(-) diff --git a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp index fc9bb317ea87..b0c78c862937 100644 --- a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp +++ b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp @@ -139,26 +139,6 @@ namespace hpx::detail { return true; } - bool try_unlock_shared_fast() - { - auto s = state.load(std::memory_order_acquire); - while (true) - { - if (s.data.exclusive || s.data.exclusive_waiting_blocked || - s.data.upgrade || s.data.shared_count <= 1) - { - return false; - } - - auto s1 = s; - --s.data.shared_count; - if (set_state(s1, s)) - { - return true; - } - s = s1; - } - } void unlock_shared() { @@ -554,8 +534,6 @@ namespace hpx::detail { void unlock_shared() { auto data = data_; - if (data->try_unlock_shared_fast()) - return; data->unlock_shared(); } diff --git a/tests/performance/local/shared_mutex_overhead.cpp b/tests/performance/local/shared_mutex_overhead.cpp index cb06faa497c7..7a217cb7c56e 100644 --- a/tests/performance/local/shared_mutex_overhead.cpp +++ b/tests/performance/local/shared_mutex_overhead.cpp @@ -14,6 +14,7 @@ #include #include #include +#include std::uint64_t num_iterations = 100000; std::uint64_t reader_threads = 4; From cb7f0afd856f062e0960428971fd26bd172e3149 Mon Sep 17 00:00:00 2001 From: arpittkhandelwal Date: Wed, 6 May 2026 08:40:33 +0530 Subject: [PATCH 12/12] style: Fix clang-format issues in shared_mutex.hpp and shared_mutex_overhead.cpp --- .../include/hpx/synchronization/shared_mutex.hpp | 1 - tests/performance/local/shared_mutex_overhead.cpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp index b0c78c862937..7997060ae0bf 100644 --- a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp +++ b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp @@ -139,7 +139,6 @@ namespace hpx::detail { return true; } - void unlock_shared() { auto s = state.load(std::memory_order_acquire); diff --git a/tests/performance/local/shared_mutex_overhead.cpp b/tests/performance/local/shared_mutex_overhead.cpp index 7a217cb7c56e..c15f9c4a442b 100644 --- a/tests/performance/local/shared_mutex_overhead.cpp +++ b/tests/performance/local/shared_mutex_overhead.cpp @@ -13,8 +13,8 @@ #include #include -#include #include +#include std::uint64_t num_iterations = 100000; std::uint64_t reader_threads = 4;