Skip to content

Commit d793091

Browse files
wjakobvfdev-5
andauthored
Fix race condition in free-threaded Python (fixes issue #867) (#887)
* Fix race condition in free-threaded Python (fixes issue #867) This commit addresses an issue arising when multiple threads want to access the Python object associated with the same C++ instance, which does not exist yet and therefore must be created. @vfdev-5 reported that TSAN detects a race condition in code that uses this pattern, caused by concurrent unprotected reads/writes of internal ``nb_inst`` fields. To fix this issue, we split instance creation and registration into a two-step process. The latter is only done when the object is fully constructed. * Added test case for issue #867 --------- Co-authored-by: vfdev-5 <[email protected]>
1 parent 89c40e1 commit d793091

File tree

4 files changed

+74
-12
lines changed

4 files changed

+74
-12
lines changed

docs/free_threaded.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,11 @@ supplemental locking. The :ref:`next section <free-threaded-locks>` explains a
103103
Python-specific locking primitive that can be used in binding code besides
104104
the solutions mentioned above.
105105

106+
Multi-threaded code that concurrently returns the same C++ instance via the
107+
:cpp:enumerator:`nb::rv_policy::reference` policy may observe situations, where
108+
multiple Python objects are created that all wrap the same C++ instance
109+
(however, this is harmless aside from the duplication).
110+
106111
.. _free-threaded-locks:
107112

108113
Python locks

src/nb_type.cpp

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ PyObject *inst_new_int(PyTypeObject *tp, PyObject * /* args */,
9999
self->clear_keep_alive = 0;
100100
self->intrusive = intrusive;
101101
self->unused = 0;
102+
103+
// Make the object compatible with nb_try_inc_ref (free-threaded builds only)
102104
nb_enable_try_inc_ref((PyObject *) self);
103105

104106
// Update hash table that maps from C++ to Python instance
@@ -111,7 +113,9 @@ PyObject *inst_new_int(PyTypeObject *tp, PyObject * /* args */,
111113
return (PyObject *) self;
112114
}
113115

114-
/// Allocate memory for a nb_type instance with external storage
116+
/// Allocate memory for a nb_type instance with external storage. In contrast to
117+
/// 'inst_new_int()', this does not yet register the instance in the internal
118+
/// data structures. The function 'inst_register()' must be used to do so.
115119
PyObject *inst_new_ext(PyTypeObject *tp, void *value) {
116120
bool gc = PyType_HasFeature(tp, Py_TPFLAGS_HAVE_GC);
117121

@@ -165,13 +169,20 @@ PyObject *inst_new_ext(PyTypeObject *tp, void *value) {
165169
self->clear_keep_alive = 0;
166170
self->intrusive = intrusive;
167171
self->unused = 0;
172+
173+
// Make the object compatible with nb_try_inc_ref (free-threaded builds only)
168174
nb_enable_try_inc_ref((PyObject *) self);
169175

176+
return (PyObject *) self;
177+
}
178+
179+
/// Register the object constructed by 'inst_new_ext()' in the internal data structures
180+
static void inst_register(PyObject *inst, void *value) noexcept {
170181
nb_shard &shard = internals->shard(value);
171182
lock_shard guard(shard);
172183

173184
// Update hash table that maps from C++ to Python instance
174-
auto [it, success] = shard.inst_c2p.try_emplace(value, self);
185+
auto [it, success] = shard.inst_c2p.try_emplace(value, inst);
175186

176187
if (NB_UNLIKELY(!success)) {
177188
void *entry = it->second;
@@ -188,8 +199,9 @@ PyObject *inst_new_ext(PyTypeObject *tp, void *value) {
188199

189200
nb_inst_seq *seq = nb_get_seq(entry);
190201
while (true) {
191-
check((nb_inst *) seq->inst != self,
192-
"nanobind::detail::inst_new_ext(): duplicate instance!");
202+
// The following should never happen
203+
check(inst != seq->inst, "nanobind::detail::inst_new_ext(): duplicate instance!");
204+
193205
if (!seq->next)
194206
break;
195207
seq = seq->next;
@@ -199,14 +211,13 @@ PyObject *inst_new_ext(PyTypeObject *tp, void *value) {
199211
check(next,
200212
"nanobind::detail::inst_new_ext(): list element allocation failed!");
201213

202-
next->inst = (PyObject *) self;
214+
next->inst = (PyObject *) inst;
203215
next->next = nullptr;
204216
seq->next = next;
205217
}
206-
207-
return (PyObject *) self;
208218
}
209219

220+
210221
static void inst_dealloc(PyObject *self) {
211222
PyTypeObject *tp = Py_TYPE(self);
212223
const type_data *t = nb_type_data(tp);
@@ -1737,6 +1748,9 @@ static PyObject *nb_type_put_common(void *value, type_data *t, rv_policy rvp,
17371748
if (intrusive)
17381749
t->set_self_py(new_value, (PyObject *) inst);
17391750

1751+
if (!create_new)
1752+
inst_register((PyObject *) inst, value);
1753+
17401754
return (PyObject *) inst;
17411755
}
17421756

@@ -2082,6 +2096,7 @@ PyObject *nb_inst_reference(PyTypeObject *t, void *ptr, PyObject *parent) {
20822096
nbi->state = nb_inst::state_ready;
20832097
if (parent)
20842098
keep_alive(result, parent);
2099+
inst_register(result, ptr);
20852100
return result;
20862101
}
20872102

@@ -2092,6 +2107,7 @@ PyObject *nb_inst_take_ownership(PyTypeObject *t, void *ptr) {
20922107
nb_inst *nbi = (nb_inst *) result;
20932108
nbi->destruct = nbi->cpp_delete = true;
20942109
nbi->state = nb_inst::state_ready;
2110+
inst_register(result, ptr);
20952111
return result;
20962112
}
20972113

tests/test_thread.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,23 @@ struct GlobalData {} global_data;
1616

1717
nb::ft_mutex mutex;
1818

19+
struct ClassWithProperty {
20+
public:
21+
ClassWithProperty(int value): value_(value) {}
22+
int get_prop() const { return value_; }
23+
private:
24+
int value_;
25+
};
26+
27+
class ClassWithClassProperty {
28+
public:
29+
ClassWithClassProperty(ClassWithProperty value) : value_(std::move(value)) {};
30+
const ClassWithProperty& get_prop() const { return value_; }
31+
private:
32+
ClassWithProperty value_;
33+
};
34+
35+
1936
NB_MODULE(test_thread_ext, m) {
2037
nb::class_<Counter>(m, "Counter")
2138
.def(nb::init<>())
@@ -39,4 +56,16 @@ NB_MODULE(test_thread_ext, m) {
3956

4057
nb::class_<GlobalData>(m, "GlobalData")
4158
.def_static("get", [] { return &global_data; }, nb::rv_policy::reference);
59+
60+
nb::class_<ClassWithProperty>(m, "ClassWithProperty")
61+
.def(nb::init<int>(), nb::arg("value"))
62+
.def_prop_ro("prop2", &ClassWithProperty::get_prop);
63+
64+
nb::class_<ClassWithClassProperty>(m, "ClassWithClassProperty")
65+
.def(
66+
"__init__",
67+
[](ClassWithClassProperty* self, ClassWithProperty value) {
68+
new (self) ClassWithClassProperty(std::move(value));
69+
}, nb::arg("value"))
70+
.def_prop_ro("prop1", &ClassWithClassProperty::get_prop);
4271
}

tests/test_thread.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import test_thread_ext as t
2-
from test_thread_ext import Counter, GlobalData
2+
from test_thread_ext import Counter, GlobalData, ClassWithProperty, ClassWithClassProperty
33
from common import parallelize
44

55
def test01_object_creation(n_threads=8):
@@ -29,7 +29,7 @@ def test02_global_lock(n_threads=8):
2929
n = 100000
3030
c = Counter()
3131
def f():
32-
for i in range(n):
32+
for _ in range(n):
3333
t.inc_global(c)
3434

3535
parallelize(f, n_threads=n_threads)
@@ -53,7 +53,7 @@ def test04_locked_function(n_threads=8):
5353
n = 100000
5454
c = Counter()
5555
def f():
56-
for i in range(n):
56+
for _ in range(n):
5757
t.inc_safe(c)
5858

5959
parallelize(f, n_threads=n_threads)
@@ -77,14 +77,26 @@ def f():
7777
assert c.value == n * n_threads
7878

7979

80-
def test_06_global_wrapper(n_threads=8):
80+
def test06_global_wrapper(n_threads=8):
8181
# Check wrapper lookup racing with wrapper deallocation
8282
n = 10000
8383
def f():
84-
for i in range(n):
84+
for _ in range(n):
8585
GlobalData.get()
8686
GlobalData.get()
8787
GlobalData.get()
8888
GlobalData.get()
8989

9090
parallelize(f, n_threads=n_threads)
91+
92+
93+
def test07_access_attributes(n_threads=8):
94+
n = 1000
95+
c1 = ClassWithProperty(123)
96+
c2 = ClassWithClassProperty(c1)
97+
98+
def f():
99+
for i in range(n):
100+
_ = c2.prop1.prop2
101+
102+
parallelize(f, n_threads=n_threads)

0 commit comments

Comments
 (0)