Skip to content

Commit d0ae7e2

Browse files
committed
Implement a thread_local emulation for iOS devices
1 parent d2c399e commit d0ae7e2

File tree

4 files changed

+155
-15
lines changed

4 files changed

+155
-15
lines changed

Diff for: README.md

+1
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ The only requirement to use Async++ is a C++11 compiler and standard library. Un
7171

7272
- Linux: Works with GCC 4.7+, Clang 3.2+ and Intel compiler 15+.
7373
- Mac: Works with Apple Clang (using libc++). GCC also works but you must get a recent version (4.7+).
74+
- iOS: Works with Apple Clang (using libc++). Note: because iOS has no thread local support, the library uses a workaround based on pthreads.
7475
- Windows: Works with GCC 4.8+ (with pthread-win32) and Visual Studio 2013+.
7576

7677
Building and Installing

Diff for: src/internal.h

+13
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,19 @@
6868
# define BROKEN_JOIN_IN_DESTRUCTOR
6969
#endif
7070

71+
// Apple's iOS has no thread local support yet. They claim that they don't want to
72+
// introduce a binary compatility issue when they got a better implementation available.
73+
// Luckily, pthreads supports some kind of "emulation" for that. This detects if the we
74+
// are compiling for iOS and enables the workaround accordingly.
75+
// It is also possible enabling it forcibly by setting the EMULATE_PTHREAD_THREAD_LOCAL
76+
// macro. Obviously, this will only works on platforms with pthread available.
77+
#if __APPLE__
78+
# include "TargetConditionals.h"
79+
# if TARGET_IPHONE_SIMULATOR || TARGET_OS_IPHONE
80+
# define EMULATE_PTHREAD_THREAD_LOCAL
81+
# endif
82+
#endif
83+
7184
// Force symbol visibility to hidden unless explicity exported
7285
#if defined(__GNUC__) && !defined(_WIN32)
7386
# pragma GCC visibility push(hidden)

Diff for: src/scheduler.cpp

+59-2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@
2020

2121
#include "internal.h"
2222

23+
// for pthread thread_local emulation
24+
#if defined(EMULATE_PTHREAD_THREAD_LOCAL)
25+
# include <pthread.h>
26+
#endif
27+
2328
namespace async {
2429
namespace detail {
2530

@@ -64,13 +69,65 @@ static void generic_wait_handler(task_wait_handle wait_task)
6469
thread_event.wait();
6570
}
6671

72+
#if defined(EMULATE_PTHREAD_THREAD_LOCAL)
6773
// Wait handler function, per-thread, defaults to generic version
74+
struct pthread_emulation_thread_wait_handler_key_initializer {
75+
pthread_key_t key;
76+
77+
pthread_emulation_thread_wait_handler_key_initializer()
78+
{
79+
pthread_key_create(&key, nullptr);
80+
}
81+
82+
~pthread_emulation_thread_wait_handler_key_initializer()
83+
{
84+
pthread_key_delete(key);
85+
}
86+
};
87+
88+
static pthread_key_t get_thread_wait_handler_key()
89+
{
90+
static pthread_emulation_thread_wait_handler_key_initializer initializer;
91+
return initializer.key;
92+
}
93+
94+
#else
6895
static THREAD_LOCAL wait_handler thread_wait_handler = generic_wait_handler;
96+
#endif
97+
98+
static void set_thread_wait_handler(wait_handler handler)
99+
{
100+
#if defined(EMULATE_PTHREAD_THREAD_LOCAL)
101+
// we need to call this here, because the pthread initializer is lazy,
102+
// this means the it could be null and we need to set it before trying to
103+
// get or set it
104+
pthread_setspecific(get_thread_wait_handler_key(), reinterpret_cast<void*>(handler));
105+
#else
106+
thread_wait_handler = handler;
107+
#endif
108+
}
109+
110+
static wait_handler get_thread_wait_handler()
111+
{
112+
#if defined(EMULATE_PTHREAD_THREAD_LOCAL)
113+
// we need to call this here, because the pthread initializer is lazy,
114+
// this means the it could be null and we need to set it before trying to
115+
// get or set it
116+
wait_handler handler = (wait_handler) pthread_getspecific(get_thread_wait_handler_key());
117+
if(handler == nullptr) {
118+
return generic_wait_handler;
119+
}
120+
return handler;
121+
#else
122+
return thread_wait_handler;
123+
#endif
124+
}
69125

70126
// Wait for a task to complete
71127
void wait_for_task(task_base* wait_task)
72128
{
73129
// Dispatch to the current thread's wait handler
130+
wait_handler thread_wait_handler = get_thread_wait_handler();
74131
thread_wait_handler(task_wait_handle(wait_task));
75132
}
76133

@@ -175,8 +232,8 @@ std::size_t hardware_concurrency() LIBASYNC_NOEXCEPT
175232

176233
wait_handler set_thread_wait_handler(wait_handler handler) LIBASYNC_NOEXCEPT
177234
{
178-
wait_handler old = detail::thread_wait_handler;
179-
detail::thread_wait_handler = handler;
235+
wait_handler old = detail::get_thread_wait_handler();
236+
detail::set_thread_wait_handler(handler);
180237
return old;
181238
}
182239

Diff for: src/threadpool_scheduler.cpp

+82-13
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@
2525
#include <windows.h>
2626
#endif
2727

28+
// for pthread thread_local emulation
29+
#if defined(EMULATE_PTHREAD_THREAD_LOCAL)
30+
# include <pthread.h>
31+
#endif
32+
2833
namespace async {
2934
namespace detail {
3035

@@ -65,14 +70,76 @@ struct threadpool_data {
6570
#endif
6671
};
6772

73+
// this wrapper encapsulates both the owning_threadpool pointer and the thread id.
74+
// this is done to improve performance on the emulated thread_local reducing the number
75+
// of calls to "pthread_getspecific"
76+
struct threadpool_data_wrapper {
77+
threadpool_data* owning_threadpool;
78+
std::size_t thread_id;
79+
80+
threadpool_data_wrapper(threadpool_data* owning_threadpool, std::size_t thread_id):
81+
owning_threadpool(owning_threadpool), thread_id(thread_id) { }
82+
};
83+
84+
#if defined(EMULATE_PTHREAD_THREAD_LOCAL)
85+
struct pthread_emulation_threadpool_data_initializer {
86+
pthread_key_t key;
87+
88+
pthread_emulation_threadpool_data_initializer()
89+
{
90+
pthread_key_create(&key, [](void* wrapper_ptr) {
91+
threadpool_data_wrapper* wrapper = static_cast<threadpool_data_wrapper*>(wrapper_ptr);
92+
delete wrapper;
93+
});
94+
}
95+
96+
~pthread_emulation_threadpool_data_initializer()
97+
{
98+
pthread_key_delete(key);
99+
}
100+
};
101+
102+
static pthread_key_t get_local_threadpool_data_key()
103+
{
104+
static pthread_emulation_threadpool_data_initializer initializer;
105+
return initializer.key;
106+
}
107+
108+
#else
68109
// Thread pool this thread belongs to, or null if not in pool
69110
static THREAD_LOCAL threadpool_data* owning_threadpool = nullptr;
70111

71112
// Current thread's index in the pool
72113
static THREAD_LOCAL std::size_t thread_id;
114+
#endif
115+
116+
static void create_threadpool_data(threadpool_data* owning_threadpool_, std::size_t thread_id_)
117+
{
118+
#if defined(EMULATE_PTHREAD_THREAD_LOCAL)
119+
// the memory allocated here gets deallocated by the lambda declared on the key creation
120+
pthread_setspecific(get_local_threadpool_data_key(), new threadpool_data_wrapper(owning_threadpool_, thread_id_));
121+
#else
122+
owning_threadpool = _owning_threadpool;
123+
thread_id = _thread_id;
124+
#endif
125+
}
126+
127+
static threadpool_data_wrapper get_threadpool_data_wrapper()
128+
{
129+
#if defined(EMULATE_PTHREAD_THREAD_LOCAL)
130+
threadpool_data_wrapper* wrapper = static_cast<threadpool_data_wrapper*>(pthread_getspecific(get_local_threadpool_data_key()));
131+
if(wrapper == nullptr) {
132+
// if, for some reason, the wrapper is not set, this won't cause a crash
133+
return threadpool_data_wrapper(nullptr, 0);
134+
}
135+
return *wrapper;
136+
#else
137+
return threadpool_data_wrapper(owning_threadpool, thread_id);
138+
#endif
139+
}
73140

74141
// Try to steal a task from another thread's queue
75-
static task_run_handle steal_task(threadpool_data* impl)
142+
static task_run_handle steal_task(threadpool_data* impl, std::size_t thread_id)
76143
{
77144
// Make a list of victim thread ids and shuffle it
78145
std::vector<std::size_t> victims(impl->thread_data.size());
@@ -97,10 +164,10 @@ static task_run_handle steal_task(threadpool_data* impl)
97164

98165
// Main task stealing loop which is used by worker threads when they have
99166
// nothing to do.
100-
static void thread_task_loop(threadpool_data* impl, task_wait_handle wait_task)
167+
static void thread_task_loop(threadpool_data* impl, std::size_t thread_id, task_wait_handle wait_task)
101168
{
102169
// Get our thread's data
103-
thread_data_t& current_thread = owning_threadpool->thread_data[thread_id];
170+
thread_data_t& current_thread = impl->thread_data[thread_id];
104171

105172
// Flag indicating if we have added a continuation to the task
106173
bool added_continuation = false;
@@ -121,7 +188,7 @@ static void thread_task_loop(threadpool_data* impl, task_wait_handle wait_task)
121188
// Stealing loop
122189
while (true) {
123190
// Try to steal a task
124-
if (task_run_handle t = steal_task(impl)) {
191+
if (task_run_handle t = steal_task(impl, thread_id)) {
125192
t.run();
126193
break;
127194
}
@@ -189,26 +256,26 @@ static void thread_task_loop(threadpool_data* impl, task_wait_handle wait_task)
189256
// Wait for a task to complete (for worker threads inside thread pool)
190257
static void threadpool_wait_handler(task_wait_handle wait_task)
191258
{
192-
thread_task_loop(owning_threadpool, wait_task);
259+
threadpool_data_wrapper wrapper = get_threadpool_data_wrapper();
260+
thread_task_loop(wrapper.owning_threadpool, wrapper.thread_id, wait_task);
193261
}
194262

195263
// Worker thread main loop
196-
static void worker_thread(threadpool_data* impl, std::size_t id)
264+
static void worker_thread(threadpool_data* owning_threadpool, std::size_t thread_id)
197265
{
198-
// Save the thread id and owning threadpool
199-
owning_threadpool = impl;
200-
thread_id = id;
266+
// store on the local thread data
267+
create_threadpool_data(owning_threadpool, thread_id);
201268

202269
// Set the wait handler so threads from the pool do useful work while
203270
// waiting for another task to finish.
204271
set_thread_wait_handler(threadpool_wait_handler);
205272

206273
// Seed the random number generator with our id. This gives each thread a
207274
// different steal order.
208-
impl->thread_data[thread_id].rng.seed(static_cast<std::minstd_rand::result_type>(thread_id));
275+
owning_threadpool->thread_data[thread_id].rng.seed(static_cast<std::minstd_rand::result_type>(thread_id));
209276

210277
// Main loop, runs until the shutdown signal is recieved
211-
thread_task_loop(impl, task_wait_handle());
278+
thread_task_loop(owning_threadpool, thread_id, task_wait_handle());
212279
}
213280

214281
// Recursive function to spawn all worker threads in parallel
@@ -296,10 +363,12 @@ threadpool_scheduler::~threadpool_scheduler()
296363
// Schedule a task on the thread pool
297364
void threadpool_scheduler::schedule(task_run_handle t)
298365
{
366+
detail::threadpool_data_wrapper wrapper = detail::get_threadpool_data_wrapper();
367+
299368
// Check if we are in the thread pool
300-
if (detail::owning_threadpool == impl.get()) {
369+
if (wrapper.owning_threadpool == impl.get()) {
301370
// Push the task onto our task queue
302-
impl->thread_data[detail::thread_id].queue.push(std::move(t));
371+
impl->thread_data[wrapper.thread_id].queue.push(std::move(t));
303372

304373
// If there are no sleeping threads, just return. We check outside the
305374
// lock to avoid locking overhead in the fast path.

0 commit comments

Comments
 (0)