diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..840b6bc --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,13 @@ +# Set update schedule for GitHub Actions +# This opens a PR when actions in workflows need an update + +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + # Check for updates to GitHub Actions every week + interval: "weekly" + commit-message: + prefix: "skip changelog" # So this PR will not be added to release-drafter + include: "scope" # List of the updated dependencies in the commit will be added diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 627b3c7..a16a6be 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ jobs: fetch-depth: 0 - name: Build wheels - uses: pypa/cibuildwheel@v2.20.0 + uses: pypa/cibuildwheel@v2.22.0 - uses: actions/upload-artifact@v4 with: name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} @@ -97,7 +97,7 @@ jobs: - name: Run clang-format style check for C/C++ code. uses: jidicula/clang-format-action@v4.13.0 with: - clang-format-version: '18' + clang-format-version: '19' check-path: 'src' - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 diff --git a/pyproject.toml b/pyproject.toml index 1d989b5..e81c0ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,11 +73,17 @@ disallow_any_unimported = false build = "{cp3{10..13}-*,pp310-*}" build-frontend = "build" free-threaded-support = true -test-command = "pip install -r {project}/requirements/tests.txt && pytest --pyargs uarray" +before-test = "pip install -r {project}/requirements/tests.txt" +test-command = "pytest --pyargs uarray" [tool.cibuildwheel.macos] archs = ["universal2"] +[[tool.cibuildwheel.overrides]] +select = "*-macosx_*" +inherit.environment = "append" +environment.MACOSX_DEPLOYMENT_TARGET = "10.13" + [tool.cibuildwheel.config-settings] "cmake.verbose" = true "logging.level" = "INFO" diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 47f25a6..b915233 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,4 +1,5 @@ cmake_minimum_required(VERSION 3.15...3.30) + project(${SKBUILD_PROJECT_NAME} LANGUAGES CXX) find_package(Python COMPONENTS Interpreter Development.Module REQUIRED) @@ -9,5 +10,5 @@ Python_add_library(_uarray MODULE vectorcall.cxx _uarray_dispatch.cxx WITH_SOABI) -set_property(TARGET _uarray PROPERTY CXX_STANDARD 11) +set_property(TARGET _uarray PROPERTY CXX_STANDARD 17) install(TARGETS _uarray LIBRARY DESTINATION uarray) diff --git a/src/_uarray_dispatch.cxx b/src/_uarray_dispatch.cxx index 5333b2d..38aba72 100644 --- a/src/_uarray_dispatch.cxx +++ b/src/_uarray_dispatch.cxx @@ -1,8 +1,8 @@ +#include + #include "small_dynamic_array.h" #include "vectorcall.h" -#include - #include #include #include @@ -15,6 +15,31 @@ namespace { +template +class immortal { + alignas(T) std::byte storage[sizeof(T)]; + +public: + template + immortal(Args &&... args) { + // Construct new T in storage + new (&storage) T(std::forward(args)...); + } + ~immortal() { + // Intentionally don't call destructor + } + + T * get() { return reinterpret_cast(&storage); } + const T * get() const { return reinterpret_cast(&storage); } + const T * get_const() const { return reinterpret_cast(&storage); } + + const T * operator->() const { return get(); } + T * operator->() { return get(); } + + T & operator*() { return *get(); } + const T & operator*() const { return *get(); } +}; + /** Handle to a python object that automatically DECREFs */ class py_ref { explicit py_ref(PyObject * object): obj_(object) {} @@ -129,8 +154,8 @@ using global_state_t = std::unordered_map; using local_state_t = std::unordered_map; static py_ref BackendNotImplementedError; -static global_state_t global_domain_map; -thread_local global_state_t * current_global_state = &global_domain_map; +static immortal global_domain_map; +thread_local global_state_t * current_global_state = global_domain_map.get(); thread_local global_state_t thread_local_domain_map; thread_local local_state_t local_domain_map; @@ -140,30 +165,30 @@ Using these with PyObject_GetAttr is faster than PyObject_GetAttrString which has to create a new python string internally. */ struct { - py_ref ua_convert; - py_ref ua_domain; - py_ref ua_function; + immortal ua_convert; + immortal ua_domain; + immortal ua_function; bool init() { - ua_convert = py_ref::steal(PyUnicode_InternFromString("__ua_convert__")); - if (!ua_convert) + *ua_convert = py_ref::steal(PyUnicode_InternFromString("__ua_convert__")); + if (!*ua_convert) return false; - ua_domain = py_ref::steal(PyUnicode_InternFromString("__ua_domain__")); - if (!ua_domain) + *ua_domain = py_ref::steal(PyUnicode_InternFromString("__ua_domain__")); + if (!*ua_domain) return false; - ua_function = py_ref::steal(PyUnicode_InternFromString("__ua_function__")); - if (!ua_function) + *ua_function = py_ref::steal(PyUnicode_InternFromString("__ua_function__")); + if (!*ua_function) return false; return true; } void clear() { - ua_convert.reset(); - ua_domain.reset(); - ua_function.reset(); + ua_convert->reset(); + ua_domain->reset(); + ua_function->reset(); } } identifiers; @@ -202,7 +227,7 @@ std::string domain_to_string(PyObject * domain) { Py_ssize_t backend_get_num_domains(PyObject * backend) { auto domain = - py_ref::steal(PyObject_GetAttr(backend, identifiers.ua_domain.get())); + py_ref::steal(PyObject_GetAttr(backend, identifiers.ua_domain->get())); if (!domain) return -1; @@ -225,7 +250,7 @@ enum class LoopReturn { Continue, Break, Error }; template LoopReturn backend_for_each_domain(PyObject * backend, Func f) { auto domain = - py_ref::steal(PyObject_GetAttr(backend, identifiers.ua_domain.get())); + py_ref::steal(PyObject_GetAttr(backend, identifiers.ua_domain->get())); if (!domain) return LoopReturn::Error; @@ -537,7 +562,7 @@ struct BackendState { /** Clean up global python references when the module is finalized. */ void globals_free(void * /* self */) { - global_domain_map.clear(); + global_domain_map->clear(); BackendNotImplementedError.reset(); identifiers.clear(); } @@ -550,7 +575,7 @@ void globals_free(void * /* self */) { * cleanup. */ int globals_traverse(PyObject * self, visitproc visit, void * arg) { - for (const auto & kv : global_domain_map) { + for (const auto & kv : *global_domain_map) { const auto & globals = kv.second; PyObject * backend = globals.global.backend.get(); Py_VISIT(backend); @@ -563,7 +588,7 @@ int globals_traverse(PyObject * self, visitproc visit, void * arg) { } int globals_clear(PyObject * /* self */) { - global_domain_map.clear(); + global_domain_map->clear(); return 0; } @@ -1170,7 +1195,8 @@ py_ref Function::canonicalize_kwargs(PyObject * kwargs) { py_func_args Function::replace_dispatchables( PyObject * backend, PyObject * args, PyObject * kwargs, PyObject * coerce) { - auto has_ua_convert = PyObject_HasAttr(backend, identifiers.ua_convert.get()); + auto has_ua_convert = + PyObject_HasAttr(backend, identifiers.ua_convert->get()); if (!has_ua_convert) { return {py_ref::ref(args), py_ref::ref(kwargs)}; } @@ -1182,7 +1208,7 @@ py_func_args Function::replace_dispatchables( PyObject * convert_args[] = {backend, dispatchables.get(), coerce}; auto res = py_ref::steal(Q_PyObject_VectorcallMethod( - identifiers.ua_convert.get(), convert_args, + identifiers.ua_convert->get(), convert_args, array_size(convert_args) | Q_PY_VECTORCALL_ARGUMENTS_OFFSET, nullptr)); if (!res) { return {}; @@ -1287,7 +1313,7 @@ PyObject * Function::call(PyObject * args_, PyObject * kwargs_) { backend, reinterpret_cast(this), new_args.args.get(), new_args.kwargs.get()}; result = py_ref::steal(Q_PyObject_VectorcallMethod( - identifiers.ua_function.get(), args, + identifiers.ua_function->get(), args, array_size(args) | Q_PY_VECTORCALL_ARGUMENTS_OFFSET, nullptr)); // raise BackendNotImplemeted is equivalent to return NotImplemented @@ -1499,7 +1525,7 @@ PyObject * get_state(PyObject * /* self */, PyObject * /* args */) { output->locals = local_domain_map; output->use_thread_local_globals = - (current_global_state != &global_domain_map); + (current_global_state != global_domain_map.get()); output->globals = *current_global_state; return ref.release(); @@ -1522,8 +1548,8 @@ PyObject * set_state(PyObject * /* self */, PyObject * args) { local_domain_map = state->locals; bool use_thread_local_globals = (!reset_allowed) || state->use_thread_local_globals; - current_global_state = - use_thread_local_globals ? &thread_local_domain_map : &global_domain_map; + current_global_state = use_thread_local_globals ? &thread_local_domain_map + : global_domain_map.get(); if (use_thread_local_globals) thread_local_domain_map = state->globals; @@ -1554,7 +1580,7 @@ PyObject * determine_backend(PyObject * /*self*/, PyObject * args) { auto result = for_each_backend_in_domain( domain, [&](PyObject * backend, bool coerce_backend) { auto has_ua_convert = - PyObject_HasAttr(backend, identifiers.ua_convert.get()); + PyObject_HasAttr(backend, identifiers.ua_convert->get()); if (!has_ua_convert) { // If no __ua_convert__, assume it won't accept the type @@ -1566,7 +1592,7 @@ PyObject * determine_backend(PyObject * /*self*/, PyObject * args) { (coerce && coerce_backend) ? Py_True : Py_False}; auto res = py_ref::steal(Q_PyObject_VectorcallMethod( - identifiers.ua_convert.get(), convert_args, + identifiers.ua_convert->get(), convert_args, array_size(convert_args) | Q_PY_VECTORCALL_ARGUMENTS_OFFSET, nullptr)); if (!res) { @@ -1775,13 +1801,8 @@ PyModuleDef uarray_module = { } // namespace -#if defined(WIN32) || defined(_WIN32) -# define MODULE_EXPORT __declspec(dllexport) -#else -# define MODULE_EXPORT __attribute__((visibility("default"))) -#endif -extern "C" MODULE_EXPORT PyObject * PyInit__uarray(void) { +PyMODINIT_FUNC PyInit__uarray(void) { auto m = py_ref::steal(PyModule_Create(&uarray_module)); if (!m) @@ -1823,5 +1844,9 @@ extern "C" MODULE_EXPORT PyObject * PyInit__uarray(void) { if (!identifiers.init()) return nullptr; +#if Py_GIL_DISABLED + PyUnstable_Module_SetGIL(m.get(), Py_MOD_GIL_NOT_USED); +#endif + return m.release(); }