From 95e67bcb4257b49ffc29889a88a796c1060a3c17 Mon Sep 17 00:00:00 2001 From: Richard Hakim Date: Tue, 12 May 2026 13:55:08 -0400 Subject: [PATCH 1/4] Fix default convolution method without numba --- README.md | 10 +++++----- sparse_convolution/sparse_convolution.py | 7 ++++--- tests/test_unit.py | 17 +++++++++++++++++ 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 7a46b24..1586083 100644 --- a/README.md +++ b/README.md @@ -53,10 +53,10 @@ Four methods, each with selectable backends: | `lazy` | yes | n/a | yes | | `gather_scatter` | yes | yes | yes | -- **`direct`** (default): Batch-parallel scatter convolution with thread-local dense buffers (numba only). For each image in parallel, scatters kernel-weighted input values into an L2-cache-sized accumulator buffer, then extracts nonzeros into CSR format. Uses a two-phase approach: a lightweight boolean counting pass (1-byte flags, no float arithmetic) determines exact output sizes, then the scatter pass writes directly to right-sized arrays with zero waste. Interior pixels (~92-100%) skip bounds checking entirely via precomputed safe regions. O(nnz × K) per image with no init overhead. Fastest method across nearly all configurations. Requires `numba`. +- **`direct`**: Batch-parallel scatter convolution with thread-local dense buffers (numba only). For each image in parallel, scatters kernel-weighted input values into an L2-cache-sized accumulator buffer, then extracts nonzeros into CSR format. Uses a two-phase approach: a lightweight boolean counting pass (1-byte flags, no float arithmetic) determines exact output sizes, then the scatter pass writes directly to right-sized arrays with zero waste. Interior pixels (~92-100%) skip bounds checking entirely via precomputed safe regions. O(nnz × K) per image with no init overhead. Fastest method across nearly all configurations. Requires `numba`. - **`precomputed`**: Builds a sparse Toeplitz matrix at init; fast batched matmul. Best for large batches with the same kernel when numba is not available. - **`lazy`**: COO broadcasting, no init cost. Best for very sparse inputs with small batches. -- **`gather_scatter`**: Per-kernel-position scatter into a dense accumulator. General-purpose method for sparse batched inputs. +- **`gather_scatter`** (default): Per-kernel-position scatter into a dense accumulator. General-purpose method for sparse batched inputs. Uses `numba` automatically when available, and falls back to `numpy` otherwise. Backend selection: - **`numpy`**: scipy/numpy ops. Always available. @@ -68,12 +68,12 @@ conv = sc.Toeplitz_convolution2d( x_shape=(100, 100), k=k, mode='same', - method='direct', # default - backend='numba', # auto-selected for direct + method='gather_scatter', # default + backend=None, # numba if installed, otherwise numpy ) ``` -If `backend=None` (default), auto-selects `numba` for `direct` and `gather_scatter` (if installed), `numpy` otherwise. +If `backend=None` (default), `gather_scatter` auto-selects `numba` when installed and falls back to `numpy` otherwise. Use `method='direct'` explicitly for the fastest numba-only implementation. ## References - Toeplitz convolution: [stackoverflow.com/a/51865516](https://stackoverflow.com/a/51865516), [alisaaalehi/convolution_as_multiplication](https://github.com/alisaaalehi/convolution_as_multiplication) diff --git a/sparse_convolution/sparse_convolution.py b/sparse_convolution/sparse_convolution.py index 41b2eb5..bf45cf2 100644 --- a/sparse_convolution/sparse_convolution.py +++ b/sparse_convolution/sparse_convolution.py @@ -110,8 +110,9 @@ class Toeplitz_convolution2d(): * ``'direct'``: ``'numba'`` (only option) \\n If ``None``, auto-selects the best available backend: - ``'numba'`` for ``'gather_scatter'`` and ``'direct'`` (if - installed), ``'numpy'`` otherwise. + ``'numba'`` for ``'gather_scatter'`` (if installed), ``'numpy'`` + otherwise. ``'direct'`` requires ``backend='numba'`` and numba to + be installed. max_buffer_bytes (int): Maximum memory (bytes) for the dense accumulator buffer used by ``'gather_scatter'``. Controls chunk size for batch processing. @@ -144,7 +145,7 @@ def __init__( mode: str = 'same', dtype: Optional[np.dtype] = None, verbose: Union[bool, int] = False, - method: str = 'direct', + method: str = 'gather_scatter', max_buffer_bytes: int = 256 * 1024 * 1024, backend: Optional[str] = None, device: Optional[str] = None, diff --git a/tests/test_unit.py b/tests/test_unit.py index 6122021..d43aa0b 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -118,3 +118,20 @@ def test_gather_scatter_numpy_all_zero_kernel_returns_empty_sparse_output(): assert scipy.sparse.isspmatrix_csr(out) assert out.shape == x.shape assert out.nnz == 0 + + +def test_default_method_falls_back_without_numba(monkeypatch): + """Default construction should work in minimal scipy/numpy installs.""" + import sparse_convolution.sparse_convolution as sc_module + + monkeypatch.setattr(sc_module, "HAS_NUMBA", False) + + x = np.array([[1.0, 0.0], [0.0, 2.0]]) + k = np.array([[0.5]]) + + conv = Toeplitz_convolution2d(x_shape=x.shape, k=k, mode="same") + out = conv(x, batching=False) + + assert conv.method == "gather_scatter" + assert conv.backend == "numpy" + assert np.allclose(out, scipy.signal.convolve2d(x, k, mode="same")) From d7156f97f1ba9b19303514583b3c1526c752e54e Mon Sep 17 00:00:00 2001 From: Richard Hakim Date: Tue, 12 May 2026 14:07:00 -0400 Subject: [PATCH 2/4] Install numba in CI test job --- .github/workflows/build.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b83666e..bb5c7d6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -136,8 +136,10 @@ jobs: - name: Run pytest and generate coverage report run: | # pip install tox tox-gh-actions - pip install pytest pytest-cov - python -m pytest --capture=tee-sys --cov=sparse_convolution --cov-report=xml:coverage.xml --color=yes + # Install numba so CI exercises the optional numba backends. + # Torch is intentionally omitted because it is not required for this CI job. + python -m pip install pytest pytest-cov numba + python -m pytest --capture=tee-sys -rs --cov=sparse_convolution --cov-report=xml:coverage.xml --color=yes - name: Upload coverage reports to Codecov uses: codecov/codecov-action@v4 ## this is a public action recognized by GitHub Actions From f7cbd0f6062389462544677aea1aaefb74f701cf Mon Sep 17 00:00:00 2001 From: Richard Hakim Date: Tue, 12 May 2026 14:12:08 -0400 Subject: [PATCH 3/4] Run build workflow on pull requests --- .github/workflows/build.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bb5c7d6..29edbed 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -3,12 +3,9 @@ name: build on: push: branches: [ "main" ] - # branches: [ "dev" ] - # branches: [ "main", "dev" ] - # pull_request: - # branches: [ "main" ] - # branches: [ "dev" ] - # branches: [ "main", "dev" ] + pull_request: + branches: [ "main" ] + types: [opened, synchronize, reopened, ready_for_review] workflow_dispatch: inputs: name: From deae5107f0d0f1fb6db86be441311e4f917ebcec Mon Sep 17 00:00:00 2001 From: Richard Hakim Date: Tue, 12 May 2026 14:23:41 -0400 Subject: [PATCH 4/4] Update CI matrix Python and runner versions --- .github/workflows/build.yml | 34 +++++++++------------------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 29edbed..d7d6874 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -20,33 +20,22 @@ jobs: build: - name: ${{ matrix.platform }}, py${{ matrix.python-version }}, ${{ matrix.install-level }} + name: ${{ matrix.platform }}, py${{ matrix.python-version }} runs-on: ${{ matrix.platform }} strategy: fail-fast: false matrix: platform: [ - # ubuntu-latest, - ubuntu-24.04, - ubuntu-22.04, - ubuntu-20.04, - # # windows-latest, - windows-2022, - windows-2019, - # # macos-latest, - macos-14, - # macos-11.0, - # macos-10.15, + ubuntu-latest, + windows-latest, + macos-latest, ] - python-version: [ - # "3.9", + python-version: [ "3.10", "3.11", "3.12", - ] - install-level: [ - system, - user, + "3.13", + "3.14", ] steps: @@ -115,15 +104,10 @@ jobs: uname -a - - name: Install package with pip dependencies -- system-level - if: matrix.install-level == 'system' + - name: Install package with pip dependencies run: | ## install dependencies with optional extras - pip install -v -e . - - name: Install package with pip dependencies -- user-level - if: matrix.install-level == 'user' - run: | - pip install -v -e . --user + python -m pip install -v -e . - name: Check installed packages