diff --git a/.github/workflows/numpy.yml b/.github/workflows/test.yml
similarity index 65%
rename from .github/workflows/numpy.yml
rename to .github/workflows/test.yml
index 6ca4096e..f1a91300 100644
--- a/.github/workflows/numpy.yml
+++ b/.github/workflows/test.yml
@@ -1,4 +1,4 @@
-name: NumPy Array API
+name: Test Array API Strict
 
 on: [push, pull_request]
 
@@ -22,10 +22,12 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        python -m pip install numpy==1.26.2
+        python -m pip install array-api-strict
         python -m pip install -r requirements.txt
     - name: Run the test suite
       env:
-        ARRAY_API_TESTS_MODULE: numpy.array_api
+        ARRAY_API_TESTS_MODULE: array_api_strict
       run: |
-        pytest -v -rxXfE --ci --skips-file numpy-skips.txt
+        pytest -v -rxXfE --skips-file array-api-strict-skips.txt array_api_tests/
+        # We also have internal tests that isn't really necessary for adopters
+        pytest -v -rxXfE meta_tests/
diff --git a/README.md b/README.md
index 3050b9a3..63484bcb 100644
--- a/README.md
+++ b/README.md
@@ -138,9 +138,9 @@ issues](https://github.com/data-apis/array-api-tests/issues/) to us.
 
 ## Running on CI
 
-See our existing [GitHub Actions workflow for
-Numpy](https://github.com/data-apis/array-api-tests/blob/master/.github/workflows/numpy.yml)
-for an example of using the test suite on CI.
+See our existing [GitHub Actions workflow for `array-api-strict`](https://github.com/data-apis/array-api-tests/blob/master/.github/workflows/test.yml)
+for an example of using the test suite on CI. Note [`array-api-strict`](https://github.com/data-apis/array-api-strict)
+is an implementation of the array API that uses NumPy under the hood.
 
 ### Releases
 
@@ -161,12 +161,6 @@ You can specify the API version to use when testing via the
 array module's `__array_api_version__` value, and if that attribute doesn't
 exist then we fallback to `"2021.12"`.
 
-#### CI flag
-
-Use the `--ci` flag to run only the primary and special cases tests. You can
-ignore the other test cases as they are redundant for the purposes of checking
-compliance.
-
 #### Data-dependent shapes
 
 Use the `--disable-data-dependent-shapes` flag to skip testing functions which have
diff --git a/array-api-strict-skips.txt b/array-api-strict-skips.txt
new file mode 100644
index 00000000..0f266590
--- /dev/null
+++ b/array-api-strict-skips.txt
@@ -0,0 +1,8 @@
+# Known special case issue in NumPy. Not worth working around here
+# https://github.com/numpy/numpy/issues/21213
+array_api_tests/test_special_cases.py::test_iop[__ipow__(x1_i is -infinity and x2_i > 0 and not (x2_i.is_integer() and x2_i % 2 == 1)) -> +infinity]
+array_api_tests/test_special_cases.py::test_iop[__ipow__(x1_i is -0 and x2_i > 0 and not (x2_i.is_integer() and x2_i % 2 == 1)) -> +0]
+
+# The test suite is incorrectly checking sums that have loss of significance
+# (https://github.com/data-apis/array-api-tests/issues/168)
+array_api_tests/test_statistical_functions.py::test_sum
\ No newline at end of file
diff --git a/array_api_tests/pytest_helpers.py b/array_api_tests/pytest_helpers.py
index ead9fc6e..0e1b4c8b 100644
--- a/array_api_tests/pytest_helpers.py
+++ b/array_api_tests/pytest_helpers.py
@@ -485,7 +485,7 @@ def assert_array_elements(
         >>> assert xp.all(out == x)
 
     """
-    # __tracebackhide__ = True
+    __tracebackhide__ = True
     dh.result_type(out.dtype, expected.dtype)  # sanity check
     assert_shape(func_name, out_shape=out.shape, expected=expected.shape, kw=kw)  # sanity check
     f_func = f"[{func_name}({fmt_kw(kw)})]"
diff --git a/array_api_tests/test_array_object.py b/array_api_tests/test_array_object.py
index 6d550f60..bc3e7276 100644
--- a/array_api_tests/test_array_object.py
+++ b/array_api_tests/test_array_object.py
@@ -16,8 +16,6 @@
 from . import xp as _xp
 from .typing import DataType, Index, Param, Scalar, ScalarType, Shape
 
-pytestmark = pytest.mark.ci
-
 
 def scalar_objects(
     dtype: DataType, shape: Shape
@@ -107,6 +105,7 @@ def test_getitem(shape, dtype, data):
         ph.assert_array_elements("__getitem__", out=out, expected=expected)
 
 
+@pytest.mark.unvectorized
 @given(
     shape=hh.shapes(),
     dtypes=hh.oneway_promotable_dtypes(dh.all_dtypes),
@@ -154,6 +153,7 @@ def test_setitem(shape, dtypes, data):
         )
 
 
+@pytest.mark.unvectorized
 @pytest.mark.data_dependent_shapes
 @given(hh.shapes(), st.data())
 def test_getitem_masking(shape, data):
@@ -199,6 +199,7 @@ def test_getitem_masking(shape, data):
                 )
 
 
+@pytest.mark.unvectorized
 @given(hh.shapes(), st.data())
 def test_setitem_masking(shape, data):
     x = data.draw(hh.arrays(xps.scalar_dtypes(), shape=shape), label="x")
diff --git a/array_api_tests/test_constants.py b/array_api_tests/test_constants.py
index 01bc5456..145a2736 100644
--- a/array_api_tests/test_constants.py
+++ b/array_api_tests/test_constants.py
@@ -7,8 +7,6 @@
 from . import xp
 from .typing import Array
 
-pytestmark = pytest.mark.ci
-
 
 def assert_scalar_float(name: str, c: Any):
     assert isinstance(c, SupportsFloat), f"{name}={c!r} does not look like a float"
diff --git a/array_api_tests/test_creation_functions.py b/array_api_tests/test_creation_functions.py
index 7df439f5..6bee0533 100644
--- a/array_api_tests/test_creation_functions.py
+++ b/array_api_tests/test_creation_functions.py
@@ -3,7 +3,6 @@
 from itertools import count
 from typing import Iterator, NamedTuple, Union
 
-import pytest
 from hypothesis import assume, given, note
 from hypothesis import strategies as st
 
@@ -15,8 +14,6 @@
 from . import xps
 from .typing import DataType, Scalar
 
-pytestmark = pytest.mark.ci
-
 
 class frange(NamedTuple):
     start: float
diff --git a/array_api_tests/test_data_type_functions.py b/array_api_tests/test_data_type_functions.py
index ebad915f..1fa8c3b6 100644
--- a/array_api_tests/test_data_type_functions.py
+++ b/array_api_tests/test_data_type_functions.py
@@ -14,8 +14,6 @@
 from . import xp as _xp
 from .typing import DataType
 
-pytestmark = pytest.mark.ci
-
 
 # TODO: test with complex dtypes
 def non_complex_dtypes():
diff --git a/array_api_tests/test_fft.py b/array_api_tests/test_fft.py
index f6ab5ce6..62b94396 100644
--- a/array_api_tests/test_fft.py
+++ b/array_api_tests/test_fft.py
@@ -17,7 +17,6 @@
 from . import xp
 
 pytestmark = [
-    pytest.mark.ci,
     pytest.mark.xp_extension("fft"),
     pytest.mark.min_version("2022.12"),
 ]
diff --git a/array_api_tests/test_has_names.py b/array_api_tests/test_has_names.py
index 53eb0965..8e934781 100644
--- a/array_api_tests/test_has_names.py
+++ b/array_api_tests/test_has_names.py
@@ -9,8 +9,6 @@
 from .stubs import (array_attributes, array_methods, category_to_funcs,
                     extension_to_funcs, EXTENSIONS)
 
-pytestmark = pytest.mark.ci
-
 has_name_params = []
 for ext, stubs in extension_to_funcs.items():
     for stub in stubs:
diff --git a/array_api_tests/test_indexing_functions.py b/array_api_tests/test_indexing_functions.py
index 9f2cf319..1fc9031e 100644
--- a/array_api_tests/test_indexing_functions.py
+++ b/array_api_tests/test_indexing_functions.py
@@ -9,9 +9,8 @@
 from . import shape_helpers as sh
 from . import xps
 
-pytestmark = pytest.mark.ci
-
 
+@pytest.mark.unvectorized
 @pytest.mark.min_version("2022.12")
 @given(
     x=hh.arrays(xps.scalar_dtypes(), hh.shapes(min_dims=1, min_side=1)),
diff --git a/array_api_tests/test_linalg.py b/array_api_tests/test_linalg.py
index a20792ae..1cd57000 100644
--- a/array_api_tests/test_linalg.py
+++ b/array_api_tests/test_linalg.py
@@ -43,8 +43,6 @@
 from . import _array_module as xp
 from ._array_module import linalg
 
-pytestmark = pytest.mark.ci
-
 def assert_equal(x, y, msg_extra=None):
     extra = '' if not msg_extra else f' ({msg_extra})'
     if x.dtype in dh.all_float_dtypes:
diff --git a/array_api_tests/test_manipulation_functions.py b/array_api_tests/test_manipulation_functions.py
index 8cbe7750..045b5153 100644
--- a/array_api_tests/test_manipulation_functions.py
+++ b/array_api_tests/test_manipulation_functions.py
@@ -14,8 +14,6 @@
 from . import xps
 from .typing import Array, Shape
 
-pytestmark = pytest.mark.ci
-
 MAX_SIDE = hh.MAX_ARRAY_SIZE // 64
 MAX_DIMS = min(hh.MAX_ARRAY_SIZE // MAX_SIDE, 32)  # NumPy only supports up to 32 dims
 
@@ -121,6 +119,7 @@ def test_concat(dtypes, base_shape, data):
                     )
 
 
+@pytest.mark.unvectorized
 @given(
     x=hh.arrays(dtype=xps.scalar_dtypes(), shape=shared_shapes()),
     axis=shared_shapes().flatmap(
@@ -149,6 +148,7 @@ def test_expand_dims(x, axis):
     )
 
 
+@pytest.mark.unvectorized
 @given(
     x=hh.arrays(
         dtype=xps.scalar_dtypes(), shape=hh.shapes(min_side=1).filter(lambda s: 1 in s)
@@ -186,6 +186,7 @@ def test_squeeze(x, data):
     assert_array_ndindex("squeeze", x, x_indices=sh.ndindex(x.shape), out=out, out_indices=sh.ndindex(out.shape))
 
 
+@pytest.mark.unvectorized
 @given(
     x=hh.arrays(dtype=xps.scalar_dtypes(), shape=hh.shapes()),
     data=st.data(),
@@ -210,6 +211,7 @@ def test_flip(x, data):
                              out_indices=reverse_indices, kw=kw)
 
 
+@pytest.mark.unvectorized
 @given(
     x=hh.arrays(dtype=xps.scalar_dtypes(), shape=shared_shapes(min_dims=1)),
     axes=shared_shapes(min_dims=1).flatmap(
@@ -250,6 +252,7 @@ def reshape_shapes(draw, shape):
     return tuple(rshape)
 
 
+@pytest.mark.unvectorized
 @pytest.mark.skip("flaky")  # TODO: fix!
 @given(
     x=hh.arrays(dtype=xps.scalar_dtypes(), shape=hh.shapes(max_side=MAX_SIDE)),
@@ -282,6 +285,7 @@ def roll_ndindex(shape: Shape, shifts: Tuple[int], axes: Tuple[int]) -> Iterator
         yield tuple((i + sh) % si for i, sh, si in zip(idx, all_shifts, shape))
 
 
+@pytest.mark.unvectorized
 @given(hh.arrays(dtype=xps.scalar_dtypes(), shape=shared_shapes()), st.data())
 def test_roll(x, data):
     shift_strat = st.integers(-hh.MAX_ARRAY_SIZE, hh.MAX_ARRAY_SIZE)
@@ -321,6 +325,7 @@ def test_roll(x, data):
         assert_array_ndindex("roll", x, x_indices=sh.ndindex(x.shape), out=out, out_indices=shifted_indices, kw=kw)
 
 
+@pytest.mark.unvectorized
 @given(
     shape=shared_shapes(min_dims=1),
     dtypes=hh.mutually_promotable_dtypes(None),
diff --git a/array_api_tests/test_operators_and_elementwise_functions.py b/array_api_tests/test_operators_and_elementwise_functions.py
index 5e7f717c..27144847 100644
--- a/array_api_tests/test_operators_and_elementwise_functions.py
+++ b/array_api_tests/test_operators_and_elementwise_functions.py
@@ -21,7 +21,8 @@
 from . import xps
 from .typing import Array, DataType, Param, Scalar, ScalarType, Shape
 
-pytestmark = pytest.mark.ci
+
+pytestmark = pytest.mark.unvectorized
 
 
 def all_integer_dtypes() -> st.SearchStrategy[DataType]:
@@ -457,7 +458,7 @@ class UnaryParamContext(NamedTuple):
 
     @property
     def id(self) -> str:
-        return f"{self.func_name}"
+        return self.func_name
 
     def __repr__(self):
         return f"UnaryParamContext(<{self.id}>)"
diff --git a/array_api_tests/test_searching_functions.py b/array_api_tests/test_searching_functions.py
index 987fbb5f..ee7d4e9b 100644
--- a/array_api_tests/test_searching_functions.py
+++ b/array_api_tests/test_searching_functions.py
@@ -11,7 +11,8 @@
 from . import shape_helpers as sh
 from . import xps
 
-pytestmark = pytest.mark.ci
+
+pytestmark = pytest.mark.unvectorized
 
 
 @given(
diff --git a/array_api_tests/test_set_functions.py b/array_api_tests/test_set_functions.py
index 2ad3d852..a94a9c2d 100644
--- a/array_api_tests/test_set_functions.py
+++ b/array_api_tests/test_set_functions.py
@@ -13,7 +13,7 @@
 from . import shape_helpers as sh
 from . import xps
 
-pytestmark = [pytest.mark.ci, pytest.mark.data_dependent_shapes]
+pytestmark = [pytest.mark.data_dependent_shapes, pytest.mark.unvectorized]
 
 
 @given(hh.arrays(dtype=xps.scalar_dtypes(), shape=hh.shapes(min_side=1)))
diff --git a/array_api_tests/test_signatures.py b/array_api_tests/test_signatures.py
index ed68e99f..dc760b8a 100644
--- a/array_api_tests/test_signatures.py
+++ b/array_api_tests/test_signatures.py
@@ -33,8 +33,6 @@ def squeeze(x, /, axis):
 from . import xp
 from .stubs import array_methods, category_to_funcs, extension_to_funcs, name_to_func
 
-pytestmark = pytest.mark.ci
-
 ParameterKind = Literal[
     Parameter.POSITIONAL_ONLY,
     Parameter.VAR_POSITIONAL,
diff --git a/array_api_tests/test_sorting_functions.py b/array_api_tests/test_sorting_functions.py
index df0610ee..4aecfbe7 100644
--- a/array_api_tests/test_sorting_functions.py
+++ b/array_api_tests/test_sorting_functions.py
@@ -14,8 +14,6 @@
 from . import xps
 from .typing import Scalar, Shape
 
-pytestmark = pytest.mark.ci
-
 
 def assert_scalar_in_set(
     func_name: str,
@@ -32,6 +30,7 @@ def assert_scalar_in_set(
 
 
 # TODO: Test with signed zeros and NaNs (and ignore them somehow)
+@pytest.mark.unvectorized
 @given(
     x=hh.arrays(
         dtype=xps.real_dtypes(),
@@ -91,6 +90,7 @@ def test_argsort(x, data):
                     )
 
 
+@pytest.mark.unvectorized
 # TODO: Test with signed zeros and NaNs (and ignore them somehow)
 @given(
     x=hh.arrays(
diff --git a/array_api_tests/test_special_cases.py b/array_api_tests/test_special_cases.py
index d6335293..bd088676 100644
--- a/array_api_tests/test_special_cases.py
+++ b/array_api_tests/test_special_cases.py
@@ -35,8 +35,6 @@
 from . import xp, xps
 from .stubs import category_to_funcs
 
-pytestmark = pytest.mark.ci
-
 UnaryCheck = Callable[[float], bool]
 BinaryCheck = Callable[[float, float], bool]
 
@@ -1212,6 +1210,7 @@ def parse_binary_case_block(case_block: str) -> List[BinaryCase]:
 assert len(iop_params) != 0
 
 
+@pytest.mark.unvectorized
 @pytest.mark.parametrize("func_name, func, case", unary_params)
 @given(
     x=hh.arrays(dtype=xps.floating_dtypes(), shape=hh.shapes(min_side=1)),
@@ -1250,6 +1249,7 @@ def test_unary(func_name, func, case, x, data):
 )
 
 
+@pytest.mark.unvectorized
 @pytest.mark.parametrize("func_name, func, case", binary_params)
 @given(x1=x1_strat, x2=x2_strat, data=st.data())
 def test_binary(func_name, func, case, x1, x2, data):
@@ -1294,6 +1294,7 @@ def test_binary(func_name, func, case, x1, x2, data):
     assume(good_example)
 
 
+@pytest.mark.unvectorized
 @pytest.mark.parametrize("iop_name, iop, case", iop_params)
 @given(
     oneway_dtypes=hh.oneway_promotable_dtypes(dh.real_float_dtypes),
diff --git a/array_api_tests/test_statistical_functions.py b/array_api_tests/test_statistical_functions.py
index e5e868a2..3cce37e0 100644
--- a/array_api_tests/test_statistical_functions.py
+++ b/array_api_tests/test_statistical_functions.py
@@ -15,8 +15,6 @@
 from ._array_module import _UndefinedStub
 from .typing import DataType
 
-pytestmark = pytest.mark.ci
-
 
 def kwarg_dtypes(dtype: DataType) -> st.SearchStrategy[Optional[DataType]]:
     dtypes = [d2 for d1, d2 in dh.promotion_table if d1 == dtype]
@@ -25,6 +23,7 @@ def kwarg_dtypes(dtype: DataType) -> st.SearchStrategy[Optional[DataType]]:
     return st.none() | st.sampled_from(dtypes)
 
 
+@pytest.mark.unvectorized
 @given(
     x=hh.arrays(
         dtype=xps.real_dtypes(),
@@ -77,6 +76,7 @@ def test_mean(x, data):
     # Values testing mean is too finicky
 
 
+@pytest.mark.unvectorized
 @given(
     x=hh.arrays(
         dtype=xps.real_dtypes(),
@@ -107,6 +107,7 @@ def test_min(x, data):
         ph.assert_scalar_equals("min", type_=scalar_type, idx=out_idx, out=min_, expected=expected)
 
 
+@pytest.mark.unvectorized
 @given(
     x=hh.arrays(
         dtype=xps.numeric_dtypes(),
@@ -195,6 +196,7 @@ def test_std(x, data):
     # We can't easily test the result(s) as standard deviation methods vary a lot
 
 
+@pytest.mark.unvectorized
 @pytest.mark.skip("flaky")  # TODO: fix!
 @given(
     x=hh.arrays(
@@ -247,6 +249,7 @@ def test_sum(x, data):
         ph.assert_scalar_equals("sum", type_=scalar_type, idx=out_idx, out=sum_, expected=expected)
 
 
+@pytest.mark.unvectorized
 @pytest.mark.skip(reason="flaky")  # TODO: fix!
 @given(
     x=hh.arrays(
diff --git a/array_api_tests/test_utility_functions.py b/array_api_tests/test_utility_functions.py
index 96da2bdd..e094cfb9 100644
--- a/array_api_tests/test_utility_functions.py
+++ b/array_api_tests/test_utility_functions.py
@@ -9,9 +9,8 @@
 from . import shape_helpers as sh
 from . import xps
 
-pytestmark = pytest.mark.ci
-
 
+@pytest.mark.unvectorized
 @given(
     x=hh.arrays(dtype=xps.scalar_dtypes(), shape=hh.shapes(min_side=1)),
     data=st.data(),
@@ -39,6 +38,7 @@ def test_all(x, data):
                                 out=result, expected=expected, kw=kw)
 
 
+@pytest.mark.unvectorized
 @given(
     x=hh.arrays(dtype=xps.scalar_dtypes(), shape=hh.shapes()),
     data=st.data(),
diff --git a/conftest.py b/conftest.py
index a5d07ad2..5e2b13dc 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,9 +1,12 @@
 from functools import lru_cache
 from pathlib import Path
+import argparse
+import math
 import warnings
 import os
 
 from hypothesis import settings
+from hypothesis.errors import InvalidArgument
 from pytest import mark
 
 from array_api_tests import _array_module as xp
@@ -54,11 +57,7 @@ def pytest_addoption(parser):
         help="disable testing functions with output shapes dependent on input",
     )
     # CI
-    parser.addoption(
-        "--ci",
-        action="store_true",
-        help="run just the tests appropriate for CI",
-    )
+    parser.addoption("--ci", action="store_true", help=argparse.SUPPRESS )  # deprecated
     parser.addoption(
         "--skips-file",
         action="store",
@@ -78,11 +77,14 @@ def pytest_configure(config):
     config.addinivalue_line(
         "markers", "data_dependent_shapes: output shapes are dependent on inputs"
     )
-    config.addinivalue_line("markers", "ci: primary test")
     config.addinivalue_line(
         "markers",
         "min_version(api_version): run when greater or equal to api_version",
     )
+    config.addinivalue_line(
+        "markers",
+        "unvectorized: asserts against values via element-wise iteration (not performative!)",
+    )
     # Hypothesis
     hypothesis_max_examples = config.getoption("--hypothesis-max-examples")
     disable_deadline = config.getoption("--hypothesis-disable-deadline")
@@ -99,6 +101,12 @@ def pytest_configure(config):
         settings.load_profile("xp_override")
     else:
         settings.load_profile("xp_default")
+    # CI
+    if config.getoption("--ci"):
+        warnings.warn(
+            "Custom pytest option --ci is deprecated as any tests not for CI "
+            "are now located in meta_tests/"
+        )
 
 
 @lru_cache
@@ -110,6 +118,9 @@ def xp_has_ext(ext: str) -> bool:
 
 
 def pytest_collection_modifyitems(config, items):
+    # 1. Prepare for iterating over items
+    # -----------------------------------
+
     skips_file = skips_path = config.getoption('--skips-file')
     if skips_file is None:
         skips_file = Path(__file__).parent / "skips.txt"
@@ -144,7 +155,10 @@ def pytest_collection_modifyitems(config, items):
 
     disabled_exts = config.getoption("--disable-extension")
     disabled_dds = config.getoption("--disable-data-dependent-shapes")
-    ci = config.getoption("--ci")
+    unvectorized_max_examples = math.ceil(math.log(config.getoption("--hypothesis-max-examples") or 50))
+
+    # 2. Iterate through items and apply markers accordingly
+    # ------------------------------------------------------
 
     for item in items:
         markers = list(item.iter_markers())
@@ -178,11 +192,6 @@ def pytest_collection_modifyitems(config, items):
                         mark.skip(reason="disabled via --disable-data-dependent-shapes")
                     )
                     break
-        # skip if test not appropriate for CI
-        if ci:
-            ci_mark = next((m for m in markers if m.name == "ci"), None)
-            if ci_mark is None:
-                item.add_marker(mark.skip(reason="disabled via --ci"))
         # skip if test is for greater api_version
         ver_mark = next((m for m in markers if m.name == "min_version"), None)
         if ver_mark is not None:
@@ -193,6 +202,21 @@ def pytest_collection_modifyitems(config, items):
                         reason=f"requires ARRAY_API_TESTS_VERSION=>{min_version}"
                     )
                 )
+        # reduce max generated Hypothesis example for unvectorized tests
+        if any(m.name == "unvectorized" for m in markers):
+            # TODO: limit generated examples when settings already applied
+            if not hasattr(item.obj, "_hypothesis_internal_settings_applied"):
+                try:
+                    item.obj = settings(max_examples=unvectorized_max_examples)(item.obj)
+                except InvalidArgument as e:
+                    warnings.warn(
+                        f"Tried decorating {item.name} with settings() but got "
+                        f"hypothesis.errors.InvalidArgument: {e}"
+                    )
+
+
+    # 3. Warn on bad skipped/xfailed ids
+    # ----------------------------------
 
     bad_ids_end_msg = (
         "Note the relevant tests might not of been collected by pytest, or "
@@ -215,4 +239,4 @@ def pytest_collection_modifyitems(config, items):
             f"{f_bad_ids}\n"
             f"(xfails file: {xfails_file})\n"
             f"{bad_ids_end_msg}"
-        )
+        )
\ No newline at end of file
diff --git a/meta_tests/README.md b/meta_tests/README.md
new file mode 100644
index 00000000..fb563cf6
--- /dev/null
+++ b/meta_tests/README.md
@@ -0,0 +1 @@
+Testing the utilities used in `array_api_tests/`
\ No newline at end of file
diff --git a/array_api_tests/meta/__init__.py b/meta_tests/__init__.py
similarity index 100%
rename from array_api_tests/meta/__init__.py
rename to meta_tests/__init__.py
diff --git a/array_api_tests/meta/test_array_helpers.py b/meta_tests/test_array_helpers.py
similarity index 83%
rename from array_api_tests/meta/test_array_helpers.py
rename to meta_tests/test_array_helpers.py
index 68f96910..99d83f40 100644
--- a/array_api_tests/meta/test_array_helpers.py
+++ b/meta_tests/test_array_helpers.py
@@ -1,5 +1,5 @@
-from .. import _array_module as xp
-from ..array_helpers import exactly_equal, notequal
+from array_api_tests import _array_module as xp
+from array_api_tests .array_helpers import exactly_equal, notequal
 
 # TODO: These meta-tests currently only work with NumPy
 
diff --git a/array_api_tests/meta/test_broadcasting.py b/meta_tests/test_broadcasting.py
similarity index 95%
rename from array_api_tests/meta/test_broadcasting.py
rename to meta_tests/test_broadcasting.py
index 72de61cf..2f6310c1 100644
--- a/array_api_tests/meta/test_broadcasting.py
+++ b/meta_tests/test_broadcasting.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-from .. import shape_helpers as sh
+from array_api_tests import shape_helpers as sh
 
 
 @pytest.mark.parametrize(
diff --git a/array_api_tests/meta/test_equality_mapping.py b/meta_tests/test_equality_mapping.py
similarity index 93%
rename from array_api_tests/meta/test_equality_mapping.py
rename to meta_tests/test_equality_mapping.py
index 86fa7e14..8ac481f6 100644
--- a/array_api_tests/meta/test_equality_mapping.py
+++ b/meta_tests/test_equality_mapping.py
@@ -1,6 +1,6 @@
 import pytest
 
-from ..dtype_helpers import EqualityMapping
+from array_api_tests .dtype_helpers import EqualityMapping
 
 
 def test_raises_on_distinct_eq_key():
diff --git a/array_api_tests/meta/test_hypothesis_helpers.py b/meta_tests/test_hypothesis_helpers.py
similarity index 93%
rename from array_api_tests/meta/test_hypothesis_helpers.py
rename to meta_tests/test_hypothesis_helpers.py
index f63c009f..b14b728c 100644
--- a/array_api_tests/meta/test_hypothesis_helpers.py
+++ b/meta_tests/test_hypothesis_helpers.py
@@ -6,13 +6,13 @@
 from hypothesis import strategies as st
 from hypothesis.errors import Unsatisfiable
 
-from .. import _array_module as xp
-from .. import array_helpers as ah
-from .. import dtype_helpers as dh
-from .. import hypothesis_helpers as hh
-from .. import shape_helpers as sh
-from .. import xps
-from .._array_module import _UndefinedStub
+from array_api_tests import _array_module as xp
+from array_api_tests import array_helpers as ah
+from array_api_tests import dtype_helpers as dh
+from array_api_tests import hypothesis_helpers as hh
+from array_api_tests import shape_helpers as sh
+from array_api_tests import xps
+from array_api_tests ._array_module import _UndefinedStub
 
 UNDEFINED_DTYPES = any(isinstance(d, _UndefinedStub) for d in dh.all_dtypes)
 pytestmark = [pytest.mark.skipif(UNDEFINED_DTYPES, reason="undefined dtypes")]
diff --git a/array_api_tests/meta/test_linalg.py b/meta_tests/test_linalg.py
similarity index 63%
rename from array_api_tests/meta/test_linalg.py
rename to meta_tests/test_linalg.py
index a4171e81..82794b6c 100644
--- a/array_api_tests/meta/test_linalg.py
+++ b/meta_tests/test_linalg.py
@@ -2,9 +2,9 @@
 
 from hypothesis import given
 
-from ..hypothesis_helpers import symmetric_matrices
-from .. import array_helpers as ah
-from .. import _array_module as xp
+from array_api_tests .hypothesis_helpers import symmetric_matrices
+from array_api_tests import array_helpers as ah
+from array_api_tests import _array_module as xp
 
 @pytest.mark.xp_extension('linalg')
 @given(x=symmetric_matrices(finite=True))
diff --git a/array_api_tests/meta/test_partial_adopters.py b/meta_tests/test_partial_adopters.py
similarity index 68%
rename from array_api_tests/meta/test_partial_adopters.py
rename to meta_tests/test_partial_adopters.py
index 6eda5c89..de3a7e76 100644
--- a/array_api_tests/meta/test_partial_adopters.py
+++ b/meta_tests/test_partial_adopters.py
@@ -1,10 +1,10 @@
 import pytest
 from hypothesis import given
 
-from .. import dtype_helpers as dh
-from .. import hypothesis_helpers as hh
-from .. import _array_module as xp
-from .._array_module import _UndefinedStub
+from array_api_tests import dtype_helpers as dh
+from array_api_tests import hypothesis_helpers as hh
+from array_api_tests import _array_module as xp
+from array_api_tests ._array_module import _UndefinedStub
 
 
 # e.g. PyTorch only supports uint8 currently
diff --git a/array_api_tests/meta/test_pytest_helpers.py b/meta_tests/test_pytest_helpers.py
similarity index 64%
rename from array_api_tests/meta/test_pytest_helpers.py
rename to meta_tests/test_pytest_helpers.py
index a32c6f33..a0aa0930 100644
--- a/array_api_tests/meta/test_pytest_helpers.py
+++ b/meta_tests/test_pytest_helpers.py
@@ -1,7 +1,8 @@
 from pytest import raises
 
-from .. import _array_module as xp
-from .. import pytest_helpers as ph
+from array_api_tests import xp as _xp
+from array_api_tests import _array_module as xp
+from array_api_tests import pytest_helpers as ph
 
 
 def test_assert_dtype():
@@ -16,10 +17,12 @@ def test_assert_dtype():
 def test_assert_array_elements():
     ph.assert_array_elements("int zeros", out=xp.asarray(0), expected=xp.asarray(0))
     ph.assert_array_elements("pos zeros", out=xp.asarray(0.0), expected=xp.asarray(0.0))
-    with raises(AssertionError):
-        ph.assert_array_elements("mixed sign zeros", out=xp.asarray(0.0), expected=xp.asarray(-0.0))
-    with raises(AssertionError):
-        ph.assert_array_elements("mixed sign zeros", out=xp.asarray(-0.0), expected=xp.asarray(0.0))
+    ph.assert_array_elements("neg zeros", out=xp.asarray(-0.0), expected=xp.asarray(-0.0))
+    if hasattr(_xp, "signbit"):
+        with raises(AssertionError):
+            ph.assert_array_elements("mixed sign zeros", out=xp.asarray(0.0), expected=xp.asarray(-0.0))
+        with raises(AssertionError):
+            ph.assert_array_elements("mixed sign zeros", out=xp.asarray(-0.0), expected=xp.asarray(0.0))
 
     ph.assert_array_elements("nans", out=xp.asarray(float("nan")), expected=xp.asarray(float("nan")))
     with raises(AssertionError):
diff --git a/array_api_tests/meta/test_signatures.py b/meta_tests/test_signatures.py
similarity index 96%
rename from array_api_tests/meta/test_signatures.py
rename to meta_tests/test_signatures.py
index 2efe1881..937f73f3 100644
--- a/array_api_tests/meta/test_signatures.py
+++ b/meta_tests/test_signatures.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from ..test_signatures import _test_inspectable_func
+from array_api_tests .test_signatures import _test_inspectable_func
 
 
 def stub(foo, /, bar=None, *, baz=None):
diff --git a/array_api_tests/meta/test_special_cases.py b/meta_tests/test_special_cases.py
similarity index 75%
rename from array_api_tests/meta/test_special_cases.py
rename to meta_tests/test_special_cases.py
index 826e5969..40c7806c 100644
--- a/array_api_tests/meta/test_special_cases.py
+++ b/meta_tests/test_special_cases.py
@@ -1,6 +1,6 @@
 import math
 
-from ..test_special_cases import parse_result
+from array_api_tests .test_special_cases import parse_result
 
 
 def test_parse_result():
diff --git a/array_api_tests/meta/test_utils.py b/meta_tests/test_utils.py
similarity index 86%
rename from array_api_tests/meta/test_utils.py
rename to meta_tests/test_utils.py
index dbd99495..911ba899 100644
--- a/array_api_tests/meta/test_utils.py
+++ b/meta_tests/test_utils.py
@@ -2,14 +2,14 @@
 from hypothesis import given
 from hypothesis import strategies as st
 
-from .. import _array_module as xp
-from .. import dtype_helpers as dh
-from .. import hypothesis_helpers as hh
-from .. import shape_helpers as sh
-from .. import xps
-from ..test_creation_functions import frange
-from ..test_manipulation_functions import roll_ndindex
-from ..test_operators_and_elementwise_functions import mock_int_dtype
+from array_api_tests import _array_module as xp
+from array_api_tests import dtype_helpers as dh
+from array_api_tests import hypothesis_helpers as hh
+from array_api_tests import shape_helpers as sh
+from array_api_tests import xps
+from array_api_tests .test_creation_functions import frange
+from array_api_tests .test_manipulation_functions import roll_ndindex
+from array_api_tests .test_operators_and_elementwise_functions import mock_int_dtype
 
 
 @pytest.mark.parametrize(
diff --git a/numpy-skips.txt b/numpy-skips.txt
deleted file mode 100644
index aebc249d..00000000
--- a/numpy-skips.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-# copy not implemented
-array_api_tests/test_creation_functions.py::test_asarray_arrays
-# https://github.com/numpy/numpy/issues/20870
-array_api_tests/test_data_type_functions.py::test_can_cast
-# The return dtype for trace is not consistent in the spec
-# https://github.com/data-apis/array-api/issues/202#issuecomment-952529197
-array_api_tests/test_linalg.py::test_trace
-# waiting on NumPy to allow/revert distinct NaNs for np.unique
-# https://github.com/numpy/numpy/issues/20326#issuecomment-1012380448
-array_api_tests/test_set_functions.py
-
-# newaxis not included in numpy namespace as of v1.26.2
-array_api_tests/test_constants.py::test_newaxis
-
-# linalg.solve issue in numpy.array_api as of v1.26.2 (see numpy#25146)
-array_api_tests/test_linalg.py::test_solve
-# numpy.array_api needs updating... or replaced on CI
-array_api_tests/test_linalg.py::test_cross
-
-# https://github.com/numpy/numpy/issues/21373
-array_api_tests/test_array_object.py::test_getitem
-
-# missing copy arg
-array_api_tests/test_signatures.py::test_func_signature[reshape]
-
-# does not (yet) raise an exception for zero-dimensional inputs to nonzero
-array_api_tests/test_searching_functions.py::test_nonzero_zerodim_error
-
-# https://github.com/numpy/numpy/issues/21211
-array_api_tests/test_special_cases.py::test_iop[__iadd__(x1_i is -0 and x2_i is -0) -> -0]
-# https://github.com/numpy/numpy/issues/21213
-array_api_tests/test_special_cases.py::test_iop[__ipow__(x1_i is -infinity and x2_i > 0 and not (x2_i.is_integer() and x2_i % 2 == 1)) -> +infinity]
-array_api_tests/test_special_cases.py::test_iop[__ipow__(x1_i is -0 and x2_i > 0 and not (x2_i.is_integer() and x2_i % 2 == 1)) -> +0]
-# noted diversions from spec
-array_api_tests/test_special_cases.py::test_binary[floor_divide(x1_i is +infinity and isfinite(x2_i) and x2_i > 0) -> +infinity]
-array_api_tests/test_special_cases.py::test_binary[floor_divide(x1_i is +infinity and isfinite(x2_i) and x2_i < 0) -> -infinity]
-array_api_tests/test_special_cases.py::test_binary[floor_divide(x1_i is -infinity and isfinite(x2_i) and x2_i > 0) -> -infinity]
-array_api_tests/test_special_cases.py::test_binary[floor_divide(x1_i is -infinity and isfinite(x2_i) and x2_i < 0) -> +infinity]
-array_api_tests/test_special_cases.py::test_binary[floor_divide(isfinite(x1_i) and x1_i > 0 and x2_i is -infinity) -> -0]
-array_api_tests/test_special_cases.py::test_binary[floor_divide(isfinite(x1_i) and x1_i < 0 and x2_i is +infinity) -> -0]
-array_api_tests/test_special_cases.py::test_binary[__floordiv__(x1_i is +infinity and isfinite(x2_i) and x2_i > 0) -> +infinity]
-array_api_tests/test_special_cases.py::test_binary[__floordiv__(x1_i is +infinity and isfinite(x2_i) and x2_i < 0) -> -infinity]
-array_api_tests/test_special_cases.py::test_binary[__floordiv__(x1_i is -infinity and isfinite(x2_i) and x2_i > 0) -> -infinity]
-array_api_tests/test_special_cases.py::test_binary[__floordiv__(x1_i is -infinity and isfinite(x2_i) and x2_i < 0) -> +infinity]
-array_api_tests/test_special_cases.py::test_binary[__floordiv__(isfinite(x1_i) and x1_i > 0 and x2_i is -infinity) -> -0]
-array_api_tests/test_special_cases.py::test_binary[__floordiv__(isfinite(x1_i) and x1_i < 0 and x2_i is +infinity) -> -0]
-array_api_tests/test_special_cases.py::test_iop[__ifloordiv__(x1_i is +infinity and isfinite(x2_i) and x2_i > 0) -> +infinity]
-array_api_tests/test_special_cases.py::test_iop[__ifloordiv__(x1_i is +infinity and isfinite(x2_i) and x2_i < 0) -> -infinity]
-array_api_tests/test_special_cases.py::test_iop[__ifloordiv__(x1_i is -infinity and isfinite(x2_i) and x2_i > 0) -> -infinity]
-array_api_tests/test_special_cases.py::test_iop[__ifloordiv__(x1_i is -infinity and isfinite(x2_i) and x2_i < 0) -> +infinity]
-array_api_tests/test_special_cases.py::test_iop[__ifloordiv__(isfinite(x1_i) and x1_i > 0 and x2_i is -infinity) -> -0]
-array_api_tests/test_special_cases.py::test_iop[__ifloordiv__(isfinite(x1_i) and x1_i < 0 and x2_i is +infinity) -> -0]