From 20f0d24adec6160401d294cd0dd6d4b6bdfc56be Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Tue, 14 Jan 2025 11:36:43 +0100 Subject: [PATCH 1/5] immortalize global strings in order to prevent negative refcounts --- src/_uarray_dispatch.cxx | 93 +++++++++++++++++++++++++--------------- 1 file changed, 59 insertions(+), 34 deletions(-) diff --git a/src/_uarray_dispatch.cxx b/src/_uarray_dispatch.cxx index 5333b2d..3bfc169 100644 --- a/src/_uarray_dispatch.cxx +++ b/src/_uarray_dispatch.cxx @@ -1,8 +1,8 @@ +#include + #include "small_dynamic_array.h" #include "vectorcall.h" -#include - #include #include #include @@ -15,6 +15,32 @@ namespace { +template +class immortal { + alignas(T) std::byte storage[sizeof(T)]; + +public: + template + immortal(Args&&... args) { + // Construct new T in storage + new(&storage) T(std::forward(args)...); + } + ~immortal() { + // Intentionally don't call destructor + } + + T* get() { return reinterpret_cast(&storage); } + const T* get() const { return reinterpret_cast(&storage); } + const T* get_const() const { return reinterpret_cast(&storage); } + + const T* operator ->() const { return get(); } + T* operator ->() { return get(); } + + T& operator*() { return *get(); } + const T& operator*() const { return *get(); } + +}; + /** Handle to a python object that automatically DECREFs */ class py_ref { explicit py_ref(PyObject * object): obj_(object) {} @@ -129,8 +155,8 @@ using global_state_t = std::unordered_map; using local_state_t = std::unordered_map; static py_ref BackendNotImplementedError; -static global_state_t global_domain_map; -thread_local global_state_t * current_global_state = &global_domain_map; +static immortal global_domain_map; +thread_local global_state_t * current_global_state = global_domain_map.get(); thread_local global_state_t thread_local_domain_map; thread_local local_state_t local_domain_map; @@ -140,30 +166,30 @@ Using these with PyObject_GetAttr is faster than PyObject_GetAttrString which has to create a new python string internally. */ struct { - py_ref ua_convert; - py_ref ua_domain; - py_ref ua_function; + immortal ua_convert; + immortal ua_domain; + immortal ua_function; bool init() { - ua_convert = py_ref::steal(PyUnicode_InternFromString("__ua_convert__")); - if (!ua_convert) + *ua_convert = py_ref::steal(PyUnicode_InternFromString("__ua_convert__")); + if (!*ua_convert) return false; - ua_domain = py_ref::steal(PyUnicode_InternFromString("__ua_domain__")); - if (!ua_domain) + *ua_domain = py_ref::steal(PyUnicode_InternFromString("__ua_domain__")); + if (!*ua_domain) return false; - ua_function = py_ref::steal(PyUnicode_InternFromString("__ua_function__")); - if (!ua_function) + *ua_function = py_ref::steal(PyUnicode_InternFromString("__ua_function__")); + if (!*ua_function) return false; return true; } void clear() { - ua_convert.reset(); - ua_domain.reset(); - ua_function.reset(); + ua_convert->reset(); + ua_domain->reset(); + ua_function->reset(); } } identifiers; @@ -202,7 +228,7 @@ std::string domain_to_string(PyObject * domain) { Py_ssize_t backend_get_num_domains(PyObject * backend) { auto domain = - py_ref::steal(PyObject_GetAttr(backend, identifiers.ua_domain.get())); + py_ref::steal(PyObject_GetAttr(backend, identifiers.ua_domain->get())); if (!domain) return -1; @@ -225,7 +251,7 @@ enum class LoopReturn { Continue, Break, Error }; template LoopReturn backend_for_each_domain(PyObject * backend, Func f) { auto domain = - py_ref::steal(PyObject_GetAttr(backend, identifiers.ua_domain.get())); + py_ref::steal(PyObject_GetAttr(backend, identifiers.ua_domain->get())); if (!domain) return LoopReturn::Error; @@ -537,7 +563,7 @@ struct BackendState { /** Clean up global python references when the module is finalized. */ void globals_free(void * /* self */) { - global_domain_map.clear(); + global_domain_map->clear(); BackendNotImplementedError.reset(); identifiers.clear(); } @@ -550,7 +576,7 @@ void globals_free(void * /* self */) { * cleanup. */ int globals_traverse(PyObject * self, visitproc visit, void * arg) { - for (const auto & kv : global_domain_map) { + for (const auto & kv : *global_domain_map) { const auto & globals = kv.second; PyObject * backend = globals.global.backend.get(); Py_VISIT(backend); @@ -563,7 +589,7 @@ int globals_traverse(PyObject * self, visitproc visit, void * arg) { } int globals_clear(PyObject * /* self */) { - global_domain_map.clear(); + global_domain_map->clear(); return 0; } @@ -1170,7 +1196,7 @@ py_ref Function::canonicalize_kwargs(PyObject * kwargs) { py_func_args Function::replace_dispatchables( PyObject * backend, PyObject * args, PyObject * kwargs, PyObject * coerce) { - auto has_ua_convert = PyObject_HasAttr(backend, identifiers.ua_convert.get()); + auto has_ua_convert = PyObject_HasAttr(backend, identifiers.ua_convert->get()); if (!has_ua_convert) { return {py_ref::ref(args), py_ref::ref(kwargs)}; } @@ -1182,7 +1208,7 @@ py_func_args Function::replace_dispatchables( PyObject * convert_args[] = {backend, dispatchables.get(), coerce}; auto res = py_ref::steal(Q_PyObject_VectorcallMethod( - identifiers.ua_convert.get(), convert_args, + identifiers.ua_convert->get(), convert_args, array_size(convert_args) | Q_PY_VECTORCALL_ARGUMENTS_OFFSET, nullptr)); if (!res) { return {}; @@ -1287,7 +1313,7 @@ PyObject * Function::call(PyObject * args_, PyObject * kwargs_) { backend, reinterpret_cast(this), new_args.args.get(), new_args.kwargs.get()}; result = py_ref::steal(Q_PyObject_VectorcallMethod( - identifiers.ua_function.get(), args, + identifiers.ua_function->get(), args, array_size(args) | Q_PY_VECTORCALL_ARGUMENTS_OFFSET, nullptr)); // raise BackendNotImplemeted is equivalent to return NotImplemented @@ -1499,7 +1525,7 @@ PyObject * get_state(PyObject * /* self */, PyObject * /* args */) { output->locals = local_domain_map; output->use_thread_local_globals = - (current_global_state != &global_domain_map); + (current_global_state != global_domain_map.get()); output->globals = *current_global_state; return ref.release(); @@ -1523,7 +1549,7 @@ PyObject * set_state(PyObject * /* self */, PyObject * args) { bool use_thread_local_globals = (!reset_allowed) || state->use_thread_local_globals; current_global_state = - use_thread_local_globals ? &thread_local_domain_map : &global_domain_map; + use_thread_local_globals ? &thread_local_domain_map : global_domain_map.get(); if (use_thread_local_globals) thread_local_domain_map = state->globals; @@ -1554,7 +1580,7 @@ PyObject * determine_backend(PyObject * /*self*/, PyObject * args) { auto result = for_each_backend_in_domain( domain, [&](PyObject * backend, bool coerce_backend) { auto has_ua_convert = - PyObject_HasAttr(backend, identifiers.ua_convert.get()); + PyObject_HasAttr(backend, identifiers.ua_convert->get()); if (!has_ua_convert) { // If no __ua_convert__, assume it won't accept the type @@ -1566,7 +1592,7 @@ PyObject * determine_backend(PyObject * /*self*/, PyObject * args) { (coerce && coerce_backend) ? Py_True : Py_False}; auto res = py_ref::steal(Q_PyObject_VectorcallMethod( - identifiers.ua_convert.get(), convert_args, + identifiers.ua_convert->get(), convert_args, array_size(convert_args) | Q_PY_VECTORCALL_ARGUMENTS_OFFSET, nullptr)); if (!res) { @@ -1775,13 +1801,8 @@ PyModuleDef uarray_module = { } // namespace -#if defined(WIN32) || defined(_WIN32) -# define MODULE_EXPORT __declspec(dllexport) -#else -# define MODULE_EXPORT __attribute__((visibility("default"))) -#endif -extern "C" MODULE_EXPORT PyObject * PyInit__uarray(void) { +PyMODINIT_FUNC PyInit__uarray(void) { auto m = py_ref::steal(PyModule_Create(&uarray_module)); if (!m) @@ -1823,5 +1844,9 @@ extern "C" MODULE_EXPORT PyObject * PyInit__uarray(void) { if (!identifiers.init()) return nullptr; +#if Py_GIL_DISABLED + PyUnstable_Module_SetGIL(m.get(), Py_MOD_GIL_NOT_USED); +#endif + return m.release(); } From 3748df4899e03735e230869741433c0e255ed86c Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Tue, 14 Jan 2025 11:38:51 +0100 Subject: [PATCH 2/5] Add dependabot. --- .github/dependabot.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..840b6bc --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,13 @@ +# Set update schedule for GitHub Actions +# This opens a PR when actions in workflows need an update + +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + # Check for updates to GitHub Actions every week + interval: "weekly" + commit-message: + prefix: "skip changelog" # So this PR will not be added to release-drafter + include: "scope" # List of the updated dependencies in the commit will be added From 139c522cd360d3c165efb5f370bc61f231a696d5 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Tue, 14 Jan 2025 11:40:08 +0100 Subject: [PATCH 3/5] Update `cibuildwheel`. --- .github/workflows/build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 627b3c7..648b16d 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ jobs: fetch-depth: 0 - name: Build wheels - uses: pypa/cibuildwheel@v2.20.0 + uses: pypa/cibuildwheel@v2.22.0 - uses: actions/upload-artifact@v4 with: name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} From 4adb130401ebb39d55580c89fa9a54a04fa903cb Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Tue, 14 Jan 2025 11:56:14 +0100 Subject: [PATCH 4/5] Move to C++17. --- pyproject.toml | 3 ++- src/CMakeLists.txt | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1d989b5..a00bee6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,7 +73,8 @@ disallow_any_unimported = false build = "{cp3{10..13}-*,pp310-*}" build-frontend = "build" free-threaded-support = true -test-command = "pip install -r {project}/requirements/tests.txt && pytest --pyargs uarray" +before-test = "pip install -r {project}/requirements/tests.txt" +test-command = "pytest --pyargs uarray" [tool.cibuildwheel.macos] archs = ["universal2"] diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 47f25a6..1171386 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,4 +1,6 @@ cmake_minimum_required(VERSION 3.15...3.30) +set(CMAKE_OSX_DEPLOYMENT_TARGET "10.13" CACHE STRING "Minimum OS X deployment version") + project(${SKBUILD_PROJECT_NAME} LANGUAGES CXX) find_package(Python COMPONENTS Interpreter Development.Module REQUIRED) @@ -9,5 +11,5 @@ Python_add_library(_uarray MODULE vectorcall.cxx _uarray_dispatch.cxx WITH_SOABI) -set_property(TARGET _uarray PROPERTY CXX_STANDARD 11) +set_property(TARGET _uarray PROPERTY CXX_STANDARD 17) install(TARGETS _uarray LIBRARY DESTINATION uarray) From 29bae473dd903ba1db1f206e839ccfb509990dd7 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Tue, 14 Jan 2025 11:59:00 +0100 Subject: [PATCH 5/5] `clang-format` --- .github/workflows/build.yaml | 2 +- pyproject.toml | 5 +++++ src/CMakeLists.txt | 1 - src/_uarray_dispatch.cxx | 42 ++++++++++++++++++------------------ 4 files changed, 27 insertions(+), 23 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 648b16d..a16a6be 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -97,7 +97,7 @@ jobs: - name: Run clang-format style check for C/C++ code. uses: jidicula/clang-format-action@v4.13.0 with: - clang-format-version: '18' + clang-format-version: '19' check-path: 'src' - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 diff --git a/pyproject.toml b/pyproject.toml index a00bee6..e81c0ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,6 +79,11 @@ test-command = "pytest --pyargs uarray" [tool.cibuildwheel.macos] archs = ["universal2"] +[[tool.cibuildwheel.overrides]] +select = "*-macosx_*" +inherit.environment = "append" +environment.MACOSX_DEPLOYMENT_TARGET = "10.13" + [tool.cibuildwheel.config-settings] "cmake.verbose" = true "logging.level" = "INFO" diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1171386..b915233 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,4 @@ cmake_minimum_required(VERSION 3.15...3.30) -set(CMAKE_OSX_DEPLOYMENT_TARGET "10.13" CACHE STRING "Minimum OS X deployment version") project(${SKBUILD_PROJECT_NAME} LANGUAGES CXX) diff --git a/src/_uarray_dispatch.cxx b/src/_uarray_dispatch.cxx index 3bfc169..38aba72 100644 --- a/src/_uarray_dispatch.cxx +++ b/src/_uarray_dispatch.cxx @@ -17,28 +17,27 @@ namespace { template class immortal { - alignas(T) std::byte storage[sizeof(T)]; + alignas(T) std::byte storage[sizeof(T)]; public: - template - immortal(Args&&... args) { - // Construct new T in storage - new(&storage) T(std::forward(args)...); - } - ~immortal() { - // Intentionally don't call destructor - } - - T* get() { return reinterpret_cast(&storage); } - const T* get() const { return reinterpret_cast(&storage); } - const T* get_const() const { return reinterpret_cast(&storage); } + template + immortal(Args &&... args) { + // Construct new T in storage + new (&storage) T(std::forward(args)...); + } + ~immortal() { + // Intentionally don't call destructor + } - const T* operator ->() const { return get(); } - T* operator ->() { return get(); } + T * get() { return reinterpret_cast(&storage); } + const T * get() const { return reinterpret_cast(&storage); } + const T * get_const() const { return reinterpret_cast(&storage); } - T& operator*() { return *get(); } - const T& operator*() const { return *get(); } + const T * operator->() const { return get(); } + T * operator->() { return get(); } + T & operator*() { return *get(); } + const T & operator*() const { return *get(); } }; /** Handle to a python object that automatically DECREFs */ @@ -1196,7 +1195,8 @@ py_ref Function::canonicalize_kwargs(PyObject * kwargs) { py_func_args Function::replace_dispatchables( PyObject * backend, PyObject * args, PyObject * kwargs, PyObject * coerce) { - auto has_ua_convert = PyObject_HasAttr(backend, identifiers.ua_convert->get()); + auto has_ua_convert = + PyObject_HasAttr(backend, identifiers.ua_convert->get()); if (!has_ua_convert) { return {py_ref::ref(args), py_ref::ref(kwargs)}; } @@ -1548,8 +1548,8 @@ PyObject * set_state(PyObject * /* self */, PyObject * args) { local_domain_map = state->locals; bool use_thread_local_globals = (!reset_allowed) || state->use_thread_local_globals; - current_global_state = - use_thread_local_globals ? &thread_local_domain_map : global_domain_map.get(); + current_global_state = use_thread_local_globals ? &thread_local_domain_map + : global_domain_map.get(); if (use_thread_local_globals) thread_local_domain_map = state->globals; @@ -1845,7 +1845,7 @@ PyMODINIT_FUNC PyInit__uarray(void) { return nullptr; #if Py_GIL_DISABLED - PyUnstable_Module_SetGIL(m.get(), Py_MOD_GIL_NOT_USED); + PyUnstable_Module_SetGIL(m.get(), Py_MOD_GIL_NOT_USED); #endif return m.release();