From f7bb667abb579be47cbfd9b52a0c48553a6c448c Mon Sep 17 00:00:00 2001 From: Andrey Isaev Date: Tue, 27 Jan 2026 10:49:03 +0300 Subject: [PATCH 1/9] X and raw/X format validation --- pyproject.toml | 1 + src/cap_upload_validator/errors.py | 13 ++++ src/cap_upload_validator/upload_validator.py | 47 +++++++++++++++ test/test_upload_validator.py | 63 ++++++++++++++++++++ 4 files changed, 124 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index e5edc4d..5f7fdd5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "numpy>=2.0.2", "pandas>=2.2.3", "scipy>=1.13.1", + "h5py==3.15.1", ] [project.urls] diff --git a/src/cap_upload_validator/errors.py b/src/cap_upload_validator/errors.py index c77e7a1..f34c3f6 100644 --- a/src/cap_upload_validator/errors.py +++ b/src/cap_upload_validator/errors.py @@ -113,3 +113,16 @@ class AnnDataNonStandardVarError(CapException): If there are other species you wish to upload to CAP, please contact support@celltype.info and we will work to accommodate your request. """ + +class CSCMatrixInX(CapException): + name = "CSCMatrixInX" + + def __init__(self, location: str): + """ + location: 'X', 'raw.X', or 'X and raw.X' + """ + super().__init__() + self.message = ( + f"The CSC matrix is found in {location}. " + "Gene expression matrix must be stored in CSR or dense format!" + ) diff --git a/src/cap_upload_validator/upload_validator.py b/src/cap_upload_validator/upload_validator.py index a3eb219..8b4e682 100644 --- a/src/cap_upload_validator/upload_validator.py +++ b/src/cap_upload_validator/upload_validator.py @@ -1,5 +1,6 @@ import pandas as pd import numpy as np +import h5py from scipy.sparse import issparse from cap_anndata import CapAnnData, read_h5ad import logging @@ -22,6 +23,7 @@ AnnDataNonStandardVarError, BadAnnDataFile, AnnDataNoneInGeneralMetadata, + CSCMatrixInX, ) from typing import Optional @@ -68,6 +70,7 @@ def validate(self, report_success: bool = True) -> None: if cap_adata.raw is not None: cap_adata.raw.read_var(columns=[]) + self._validate_x_and_raw_x_formats(self._adata_path) self._check_X(cap_adata) self._check_obsm(cap_adata) self._check_obs(cap_adata) @@ -287,3 +290,47 @@ def _remove_gene_version(ensemble_ids: pd.Index) -> pd.Index: clean_index = ensemble_ids.to_series().apply(lambda x: x.split(".")[0]) clean_index = pd.Index(clean_index) return clean_index + + def _is_csc(self, group_or_dataset) -> bool: + """ + Returns True if HDF5 object represents a CSC sparse matrix. + """ + if not isinstance(group_or_dataset, h5py.Group): + return False + + encoding = group_or_dataset.attrs.get("encoding-type", None) + return encoding == "csc_matrix" + + def _is_csr(self, group_or_dataset) -> bool: + if not isinstance(group_or_dataset, h5py.Group): + return False + return group_or_dataset.attrs.get("encoding-type", None) == "csr_matrix" + + def _is_dense(self, group_or_dataset) -> bool: + return isinstance(group_or_dataset, h5py.Dataset) + + def _validate_x_and_raw_x_formats(self, h5ad_path: str) -> None: + """ + Validate that X and raw.X (if exists) are dense or CSR. + Raise CSCMatrixInX otherwise. + """ + locations = [] + + with h5py.File(h5ad_path, "r") as f: + # X + x = f["X"] + if self._is_csc(x): + locations.append("X") + elif not (self._is_dense(x) or self._is_csr(x)): + locations.append("X") + + # raw.X + if "raw" in f and "X" in f["raw"]: + raw_x = f["raw/X"] + if self._is_csc(raw_x): + locations.append("raw.X") + elif not (self._is_dense(raw_x) or self._is_csr(raw_x)): + locations.append("raw.X") + + if locations: + raise CSCMatrixInX(location=" and ".join(locations)) diff --git a/test/test_upload_validator.py b/test/test_upload_validator.py index 2725427..6df6c17 100644 --- a/test/test_upload_validator.py +++ b/test/test_upload_validator.py @@ -1,4 +1,5 @@ import pytest +import h5py import numpy as np import pandas as pd import anndata as ad @@ -27,6 +28,7 @@ AnnDataNonStandardVarError, CapMultiException, AnnDataNoneInGeneralMetadata, + CSCMatrixInX, ) TMP_DIR = Path(tempfile.mkdtemp()) @@ -262,3 +264,64 @@ def test_ontology_id_instead_general_metadata(names_provided, with_none): with context: adata.read_obs(GENERAL_METADATA) v._check_obs(adata) + + +def write_sparse_group(f, path, encoding): + g = f.create_group(path) + g.attrs["encoding-type"] = encoding + g.attrs["encoding-version"] = "0.1.0" + g.attrs["shape"] = (5, 5) + g.create_dataset("data", data=np.array([1, 2])) + g.create_dataset("indices", data=np.array([0, 1])) + g.create_dataset("indptr", data=np.array([0, 1, 2])) + + +def test_csc_in_x_raises(tmp_path): + p = tmp_path / "test.h5ad" + with h5py.File(p, "w") as f: + write_sparse_group(f, "X", "csc_matrix") + + with pytest.raises(CSCMatrixInX) as e: + v = UploadValidator(p) + v._validate_x_and_raw_x_formats(p) + + assert "X" in e.value.message + + +def test_csc_in_raw_x_raises(tmp_path): + p = tmp_path / "test.h5ad" + with h5py.File(p, "w") as f: + write_sparse_group(f, "X", "csr_matrix") + raw = f.create_group("raw") + write_sparse_group(f, "raw/X", "csc_matrix") + + with pytest.raises(CSCMatrixInX) as e: + v = UploadValidator(p) + v._validate_x_and_raw_x_formats(p) + + assert "raw.X" in e.value.message + + +def test_csc_in_both_raises(tmp_path): + p = tmp_path / "test.h5ad" + with h5py.File(p, "w") as f: + write_sparse_group(f, "X", "csc_matrix") + raw = f.create_group("raw") + write_sparse_group(f, "raw/X", "csc_matrix") + + with pytest.raises(CSCMatrixInX) as e: + v = UploadValidator(p) + v._validate_x_and_raw_x_formats(p) + + assert "X and raw.X" in e.value.message + + +def test_dense_and_csr_pass(tmp_path): + p = tmp_path / "test.h5ad" + with h5py.File(p, "w") as f: + f.create_dataset("X", data=np.random.rand(5, 5)) + raw = f.create_group("raw") + write_sparse_group(f, "raw/X", "csr_matrix") + + v = UploadValidator(p) + v._validate_x_and_raw_x_formats(p) # should not raise exception From fe66e2dc907fedb455dc6e763fde9a402b0ae826 Mon Sep 17 00:00:00 2001 From: Andrey Isaev Date: Tue, 27 Jan 2026 15:13:50 +0300 Subject: [PATCH 2/9] cap_adata usage --- pyproject.toml | 1 - src/cap_upload_validator/upload_validator.py | 37 ++++++----- test/test_upload_validator.py | 69 ++++++++++++-------- 3 files changed, 59 insertions(+), 48 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5f7fdd5..e5edc4d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,6 @@ dependencies = [ "numpy>=2.0.2", "pandas>=2.2.3", "scipy>=1.13.1", - "h5py==3.15.1", ] [project.urls] diff --git a/src/cap_upload_validator/upload_validator.py b/src/cap_upload_validator/upload_validator.py index 8b4e682..4315268 100644 --- a/src/cap_upload_validator/upload_validator.py +++ b/src/cap_upload_validator/upload_validator.py @@ -70,7 +70,7 @@ def validate(self, report_success: bool = True) -> None: if cap_adata.raw is not None: cap_adata.raw.read_var(columns=[]) - self._validate_x_and_raw_x_formats(self._adata_path) + self._validate_x_and_raw_x_formats(cap_adata) self._check_X(cap_adata) self._check_obsm(cap_adata) self._check_obs(cap_adata) @@ -309,28 +309,29 @@ def _is_csr(self, group_or_dataset) -> bool: def _is_dense(self, group_or_dataset) -> bool: return isinstance(group_or_dataset, h5py.Dataset) - def _validate_x_and_raw_x_formats(self, h5ad_path: str) -> None: + def _validate_x_and_raw_x_formats(self, cap_adata: CapAnnData) -> None: """ Validate that X and raw.X (if exists) are dense or CSR. Raise CSCMatrixInX otherwise. """ locations = [] - - with h5py.File(h5ad_path, "r") as f: - # X - x = f["X"] - if self._is_csc(x): - locations.append("X") - elif not (self._is_dense(x) or self._is_csr(x)): - locations.append("X") - - # raw.X - if "raw" in f and "X" in f["raw"]: - raw_x = f["raw/X"] - if self._is_csc(raw_x): - locations.append("raw.X") - elif not (self._is_dense(raw_x) or self._is_csr(raw_x)): - locations.append("raw.X") + + f = cap_adata.file + + # X + x = f["X"] + if self._is_csc(x): + locations.append("X") + elif not (self._is_dense(x) or self._is_csr(x)): + locations.append("X") + + # raw.X + if "raw" in f and "X" in f["raw"]: + raw_x = f["raw/X"] + if self._is_csc(raw_x): + locations.append("raw.X") + elif not (self._is_dense(raw_x) or self._is_csr(raw_x)): + locations.append("raw.X") if locations: raise CSCMatrixInX(location=" and ".join(locations)) diff --git a/test/test_upload_validator.py b/test/test_upload_validator.py index 6df6c17..a872cf7 100644 --- a/test/test_upload_validator.py +++ b/test/test_upload_validator.py @@ -266,62 +266,73 @@ def test_ontology_id_instead_general_metadata(names_provided, with_none): v._check_obs(adata) -def write_sparse_group(f, path, encoding): - g = f.create_group(path) - g.attrs["encoding-type"] = encoding - g.attrs["encoding-version"] = "0.1.0" - g.attrs["shape"] = (5, 5) - g.create_dataset("data", data=np.array([1, 2])) - g.create_dataset("indices", data=np.array([0, 1])) - g.create_dataset("indptr", data=np.array([0, 1, 2])) +def write_adata_with_matrix(path, X, raw_X=None): + adata = ad.AnnData(X=X) + + if raw_X is not None: + adata.raw = ad.AnnData(X=raw_X) + + adata.write_h5ad(path) def test_csc_in_x_raises(tmp_path): p = tmp_path / "test.h5ad" - with h5py.File(p, "w") as f: - write_sparse_group(f, "X", "csc_matrix") + + X = sp.csc_matrix(np.eye(5)) # CSC + write_adata_with_matrix(p, X=X) + + v = UploadValidator(p) with pytest.raises(CSCMatrixInX) as e: - v = UploadValidator(p) - v._validate_x_and_raw_x_formats(p) + with read_h5ad(p, edit=False) as cap_adata: + v._validate_x_and_raw_x_formats(cap_adata) assert "X" in e.value.message def test_csc_in_raw_x_raises(tmp_path): p = tmp_path / "test.h5ad" - with h5py.File(p, "w") as f: - write_sparse_group(f, "X", "csr_matrix") - raw = f.create_group("raw") - write_sparse_group(f, "raw/X", "csc_matrix") + + X = sp.csr_matrix(np.eye(5)) # valid CSR + raw_X = sp.csc_matrix(np.eye(5)) # invalid CSC + + write_adata_with_matrix(p, X=X, raw_X=raw_X) + + v = UploadValidator(p) with pytest.raises(CSCMatrixInX) as e: - v = UploadValidator(p) - v._validate_x_and_raw_x_formats(p) + with read_h5ad(p, edit=False) as cap_adata: + v._validate_x_and_raw_x_formats(cap_adata) assert "raw.X" in e.value.message def test_csc_in_both_raises(tmp_path): p = tmp_path / "test.h5ad" - with h5py.File(p, "w") as f: - write_sparse_group(f, "X", "csc_matrix") - raw = f.create_group("raw") - write_sparse_group(f, "raw/X", "csc_matrix") + + X = sp.csc_matrix(np.eye(5)) + raw_X = sp.csc_matrix(np.eye(5)) + + write_adata_with_matrix(p, X=X, raw_X=raw_X) + + v = UploadValidator(p) with pytest.raises(CSCMatrixInX) as e: - v = UploadValidator(p) - v._validate_x_and_raw_x_formats(p) + with read_h5ad(p, edit=False) as cap_adata: + v._validate_x_and_raw_x_formats(cap_adata) assert "X and raw.X" in e.value.message def test_dense_and_csr_pass(tmp_path): p = tmp_path / "test.h5ad" - with h5py.File(p, "w") as f: - f.create_dataset("X", data=np.random.rand(5, 5)) - raw = f.create_group("raw") - write_sparse_group(f, "raw/X", "csr_matrix") + + X = np.random.rand(5, 5) # valid dense + raw_X = sp.csr_matrix(np.eye(5)) # valid CSR + + write_adata_with_matrix(p, X=X, raw_X=raw_X) v = UploadValidator(p) - v._validate_x_and_raw_x_formats(p) # should not raise exception + + with read_h5ad(p, edit=False) as cap_adata: + v._validate_x_and_raw_x_formats(cap_adata) # should not raise From 4e335030c0abe97c645acc5e15931b86adb7cd3e Mon Sep 17 00:00:00 2001 From: Andrey Isaev Date: Tue, 27 Jan 2026 15:31:44 +0300 Subject: [PATCH 3/9] minor fix --- src/cap_upload_validator/upload_validator.py | 9 ++++----- test/test_upload_validator.py | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/cap_upload_validator/upload_validator.py b/src/cap_upload_validator/upload_validator.py index 4315268..fdd3552 100644 --- a/src/cap_upload_validator/upload_validator.py +++ b/src/cap_upload_validator/upload_validator.py @@ -1,10 +1,9 @@ import pandas as pd import numpy as np -import h5py from scipy.sparse import issparse from cap_anndata import CapAnnData, read_h5ad import logging -from h5py import Dataset +from h5py import Dataset, Group from .gene_mapping import ( GeneMap, @@ -295,19 +294,19 @@ def _is_csc(self, group_or_dataset) -> bool: """ Returns True if HDF5 object represents a CSC sparse matrix. """ - if not isinstance(group_or_dataset, h5py.Group): + if not isinstance(group_or_dataset, Group): return False encoding = group_or_dataset.attrs.get("encoding-type", None) return encoding == "csc_matrix" def _is_csr(self, group_or_dataset) -> bool: - if not isinstance(group_or_dataset, h5py.Group): + if not isinstance(group_or_dataset, Group): return False return group_or_dataset.attrs.get("encoding-type", None) == "csr_matrix" def _is_dense(self, group_or_dataset) -> bool: - return isinstance(group_or_dataset, h5py.Dataset) + return isinstance(group_or_dataset, Dataset) def _validate_x_and_raw_x_formats(self, cap_adata: CapAnnData) -> None: """ diff --git a/test/test_upload_validator.py b/test/test_upload_validator.py index a872cf7..566b4ae 100644 --- a/test/test_upload_validator.py +++ b/test/test_upload_validator.py @@ -1,5 +1,4 @@ import pytest -import h5py import numpy as np import pandas as pd import anndata as ad From 9d756773ee78a78e5d326421a539b5d1cf65d124 Mon Sep 17 00:00:00 2001 From: Andrey Isaev Date: Mon, 2 Feb 2026 13:18:17 +0300 Subject: [PATCH 4/9] cap-anndata>=0.5.1 --- pyproject.toml | 2 +- uv.lock | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e5edc4d..08f585b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ classifiers = [ license = "BSD-3-Clause" license-files = ["LICEN[CS]E*"] dependencies = [ - "cap-anndata>=0.5.0", + "cap-anndata>=0.5.1", "numpy>=2.0.2", "pandas>=2.2.3", "scipy>=1.13.1", diff --git a/uv.lock b/uv.lock index 6b3bef6..e3be6cd 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.9" resolution-markers = [ "python_full_version >= '3.12'", @@ -65,7 +65,7 @@ wheels = [ [[package]] name = "cap-anndata" -version = "0.5.0" +version = "0.5.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anndata", version = "0.10.9", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, @@ -74,14 +74,14 @@ dependencies = [ { name = "numpy", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "pandas" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/12/b8/78a21f27e2eaa8c28a6855b2b6509ee3765139fede84042147f553e8a190/cap_anndata-0.5.0.tar.gz", hash = "sha256:8b288c1c948e068979eede4aa87b4375e91ba68ead36598a810e8e15077f580c", size = 16563, upload-time = "2025-05-30T13:06:23.545Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8b/15/a394e26dd1c8b072c74cdb641ef8f6365fbd44fe4c42bd8079c94e14c855/cap_anndata-0.5.1.tar.gz", hash = "sha256:e63160b8d03461e2e902923d86d19f0782f6f69975f5d45cea95c0f15e8a29b5", size = 16607, upload-time = "2026-02-02T09:20:21.909Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f0/0d/823f21cafd2445301fe24a6524a4d71f0597047853d759add77d3b05914a/cap_anndata-0.5.0-py3-none-any.whl", hash = "sha256:2e634dcf13c4eecbacfacd76088dd149ce2eaa6a69cecda9032e1d29d2aa067f", size = 10551, upload-time = "2025-05-30T13:06:22.443Z" }, + { url = "https://files.pythonhosted.org/packages/db/f0/60d4b716e1988f93bc05d79b4dcf74442b4524ff373f05a645ce2a725719/cap_anndata-0.5.1-py3-none-any.whl", hash = "sha256:c5606c95859e3ad6eb2d1fc41aba444e61aa10670bf676abe205bc9508351bb9", size = 10593, upload-time = "2026-02-02T09:20:20.441Z" }, ] [[package]] name = "cap-upload-validator" -version = "1.3.1" +version = "1.5.1" source = { editable = "." } dependencies = [ { name = "cap-anndata" }, @@ -94,7 +94,7 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "cap-anndata", specifier = ">=0.5.0" }, + { name = "cap-anndata", specifier = ">=0.5.1" }, { name = "numpy", specifier = ">=2.0.2" }, { name = "pandas", specifier = ">=2.2.3" }, { name = "scipy", specifier = ">=1.13.1" }, From 1fcc0aa3757885702e888c0cc4812b79410e3230 Mon Sep 17 00:00:00 2001 From: Andrey Isaev Date: Mon, 2 Feb 2026 13:43:15 +0300 Subject: [PATCH 5/9] cap-anndata>=0.5.2 --- pyproject.toml | 2 +- uv.lock | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 08f585b..34619c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ classifiers = [ license = "BSD-3-Clause" license-files = ["LICEN[CS]E*"] dependencies = [ - "cap-anndata>=0.5.1", + "cap-anndata>=0.5.2", "numpy>=2.0.2", "pandas>=2.2.3", "scipy>=1.13.1", diff --git a/uv.lock b/uv.lock index e3be6cd..68b347a 100644 --- a/uv.lock +++ b/uv.lock @@ -65,7 +65,7 @@ wheels = [ [[package]] name = "cap-anndata" -version = "0.5.1" +version = "0.5.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anndata", version = "0.10.9", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, @@ -74,9 +74,9 @@ dependencies = [ { name = "numpy", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "pandas" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8b/15/a394e26dd1c8b072c74cdb641ef8f6365fbd44fe4c42bd8079c94e14c855/cap_anndata-0.5.1.tar.gz", hash = "sha256:e63160b8d03461e2e902923d86d19f0782f6f69975f5d45cea95c0f15e8a29b5", size = 16607, upload-time = "2026-02-02T09:20:21.909Z" } +sdist = { url = "https://files.pythonhosted.org/packages/6f/41/f0ca38348b2f5be0c5ceaa524a672327dafa10fa0aaaf9cc4ae56fcd851e/cap_anndata-0.5.2.tar.gz", hash = "sha256:dff2d52fd9255eebd3c47982d0eb40036e8ff868585a5e688532d3c7cf2b9c79", size = 16584, upload-time = "2026-02-02T10:31:20.717Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/db/f0/60d4b716e1988f93bc05d79b4dcf74442b4524ff373f05a645ce2a725719/cap_anndata-0.5.1-py3-none-any.whl", hash = "sha256:c5606c95859e3ad6eb2d1fc41aba444e61aa10670bf676abe205bc9508351bb9", size = 10593, upload-time = "2026-02-02T09:20:20.441Z" }, + { url = "https://files.pythonhosted.org/packages/b1/3f/5f6f3b061fbd6b0af505fa5069f800560a1e656c6ac99aea24ea34e27627/cap_anndata-0.5.2-py3-none-any.whl", hash = "sha256:e5da884cc3a884c40c64dc105da44ec1969e60b6e5a2fd8b8444b229bb1180b7", size = 10584, upload-time = "2026-02-02T10:31:19.183Z" }, ] [[package]] @@ -94,7 +94,7 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "cap-anndata", specifier = ">=0.5.1" }, + { name = "cap-anndata", specifier = ">=0.5.2" }, { name = "numpy", specifier = ">=2.0.2" }, { name = "pandas", specifier = ">=2.2.3" }, { name = "scipy", specifier = ">=1.13.1" }, From d1e0d54b010ebfdb411dc9c574156b314109ce8f Mon Sep 17 00:00:00 2001 From: Andrey Isaev Date: Mon, 2 Feb 2026 13:50:00 +0300 Subject: [PATCH 6/9] Update test_upload_validator.py --- test/test_upload_validator.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/test_upload_validator.py b/test/test_upload_validator.py index 566b4ae..15485bd 100644 --- a/test/test_upload_validator.py +++ b/test/test_upload_validator.py @@ -1,7 +1,10 @@ import pytest import numpy as np import pandas as pd + import anndata as ad +ad.settings.allow_write_nullable_strings = True + import scipy.sparse as sp from pathlib import Path import tempfile From 3b5e9bc40441794c0968afe6bdcefed4d8b023fc Mon Sep 17 00:00:00 2001 From: Andrey Isaev Date: Mon, 2 Feb 2026 13:52:07 +0300 Subject: [PATCH 7/9] Update test_upload_validator.py --- test/test_upload_validator.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_upload_validator.py b/test/test_upload_validator.py index 15485bd..3d9c104 100644 --- a/test/test_upload_validator.py +++ b/test/test_upload_validator.py @@ -3,7 +3,9 @@ import pandas as pd import anndata as ad -ad.settings.allow_write_nullable_strings = True +from packaging import version +if version.parse(ad.__version__) >= version.parse("0.11.0"): + ad.settings.allow_write_nullable_strings = True import scipy.sparse as sp from pathlib import Path From 7dd3540d8d5394e36acb71154b74eac3c08508b6 Mon Sep 17 00:00:00 2001 From: Andrey Isaev Date: Mon, 2 Feb 2026 14:10:20 +0300 Subject: [PATCH 8/9] minor refactoring --- src/cap_upload_validator/errors.py | 7 ++++--- src/cap_upload_validator/upload_validator.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/cap_upload_validator/errors.py b/src/cap_upload_validator/errors.py index f34c3f6..b6260d3 100644 --- a/src/cap_upload_validator/errors.py +++ b/src/cap_upload_validator/errors.py @@ -117,12 +117,13 @@ class AnnDataNonStandardVarError(CapException): class CSCMatrixInX(CapException): name = "CSCMatrixInX" - def __init__(self, location: str): + def __init__(self, locations: list[str]): """ - location: 'X', 'raw.X', or 'X and raw.X' + locations: list of matrix locations, e.g. ['X'], ['raw.X'], or ['X', 'raw.X'] """ super().__init__() + self.locations = locations self.message = ( - f"The CSC matrix is found in {location}. " + f"The CSC matrix is found in {', '.join(locations)}. " "Gene expression matrix must be stored in CSR or dense format!" ) diff --git a/src/cap_upload_validator/upload_validator.py b/src/cap_upload_validator/upload_validator.py index fdd3552..b425158 100644 --- a/src/cap_upload_validator/upload_validator.py +++ b/src/cap_upload_validator/upload_validator.py @@ -333,4 +333,4 @@ def _validate_x_and_raw_x_formats(self, cap_adata: CapAnnData) -> None: locations.append("raw.X") if locations: - raise CSCMatrixInX(location=" and ".join(locations)) + raise CSCMatrixInX(locations=locations) From f34139e8c59172f30a0f3ba639a0dbe94085310c Mon Sep 17 00:00:00 2001 From: Andrey Isaev Date: Mon, 2 Feb 2026 14:15:13 +0300 Subject: [PATCH 9/9] Update errors.py --- src/cap_upload_validator/errors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cap_upload_validator/errors.py b/src/cap_upload_validator/errors.py index b6260d3..b766819 100644 --- a/src/cap_upload_validator/errors.py +++ b/src/cap_upload_validator/errors.py @@ -123,7 +123,8 @@ def __init__(self, locations: list[str]): """ super().__init__() self.locations = locations + loc_str = " and ".join(locations) self.message = ( - f"The CSC matrix is found in {', '.join(locations)}. " + f"The CSC matrix is found in {loc_str}. " "Gene expression matrix must be stored in CSR or dense format!" )