diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index bc7fe3cd6830..b0d526b1ee04 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -239,6 +239,11 @@ jobs:
- name: Test
shell: bash
run: ci/scripts/python_test.sh $(pwd) $(pwd)/build
+ - name: Test annotations
+ shell: bash
+ env:
+ PYARROW_TEST_ANNOTATIONS: "ON"
+ run: ci/scripts/python_test_type_annotations.sh $(pwd)/python
windows:
name: AMD64 Windows 2022 Python 3.13
@@ -296,3 +301,7 @@ jobs:
shell: cmd
run: |
call "ci\scripts\python_test.bat" %cd%
+ - name: Test annotations
+ shell: cmd
+ run: |
+ call "ci\scripts\python_test_type_annotations.bat" %cd%\python
diff --git a/ci/scripts/python_test_type_annotations.bat b/ci/scripts/python_test_type_annotations.bat
new file mode 100644
index 000000000000..3446e329a899
--- /dev/null
+++ b/ci/scripts/python_test_type_annotations.bat
@@ -0,0 +1,38 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements. See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership. The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License. You may obtain a copy of the License at
+@rem
+@rem http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing,
+@rem software distributed under the License is distributed on an
+@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@rem KIND, either express or implied. See the License for the
+@rem specific language governing permissions and limitations
+@rem under the License.
+
+@echo on
+
+set PYARROW_DIR=%1
+
+echo Annotation testing on Windows ...
+
+@REM Install library stubs
+%PYTHON_CMD% -m pip install pandas-stubs scipy-stubs sphinx types-cffi types-psutil types-requests types-python-dateutil || exit /B 1
+
+@REM Install other dependencies for type checking
+%PYTHON_CMD% -m pip install fsspec || exit /B 1
+
+@REM Install type checkers
+%PYTHON_CMD% -m pip install mypy pyright ty || exit /B 1
+
+@REM Run type checkers
+pushd %PYARROW_DIR%
+
+mypy
+pyright
+ty check
diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat
index fc256d72785c..2021e2d41d38 100644
--- a/ci/scripts/python_wheel_windows_build.bat
+++ b/ci/scripts/python_wheel_windows_build.bat
@@ -135,6 +135,11 @@ pushd C:\arrow\python
@REM Build wheel
%PYTHON_CMD% -m build --sdist --wheel . --no-isolation || exit /B 1
+@REM We first populate stub docstrings and then build the wheel
+%PYTHON_CMD% setup.py build_ext --inplace
+%PYTHON_CMD% -m pip install griffe libcst
+%PYTHON_CMD% ..\dev\update_stub_docstrings.py pyarrow-stubs
+
@REM Repair the wheel with delvewheel
@REM
@REM Since we bundled the Arrow C++ libraries ourselves, we only need to
diff --git a/compose.yaml b/compose.yaml
index c799059fe254..87b79300011a 100644
--- a/compose.yaml
+++ b/compose.yaml
@@ -1539,8 +1539,7 @@ services:
/arrow/ci/scripts/python_build.sh /arrow /build &&
pip install -e /arrow/dev/archery[numpydoc] &&
archery numpydoc --allow-rule GL10,PR01,PR03,PR04,PR05,PR10,RT03,YD01 &&
- /arrow/ci/scripts/python_test.sh /arrow &&
- /arrow/ci/scripts/python_test_type_annotations.sh /arrow/python"]
+ /arrow/ci/scripts/python_test.sh /arrow"]
conda-python-dask:
# Possible $DASK parameters:
diff --git a/dev/update_stub_docstrings.py b/dev/update_stub_docstrings.py
new file mode 100644
index 000000000000..eaeb2a510eb5
--- /dev/null
+++ b/dev/update_stub_docstrings.py
@@ -0,0 +1,214 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Utility to extract docstrings from pyarrow and update
+# docstrings in stubfiles.
+#
+# Usage
+# =====
+#
+# python ../dev/update_stub_docstrings.py pyarrow-stubs
+
+
+from pathlib import Path
+from textwrap import indent
+
+import click
+# TODO: perhaps replace griffe with importlib
+import griffe
+from griffe import AliasResolutionError
+import libcst
+from libcst import matchers as m
+
+
+def _get_docstring(name, package, indentation):
+ # print("extract_docstrings", name)
+ try:
+ obj = package.get_member(name)
+ except (KeyError, ValueError, AliasResolutionError):
+ # Some cython __init__ symbols can't be found
+ # e.g. pyarrow.lib.OSFile.__init__
+ stack = name.split(".")
+ parent_name = ".".join(stack[:-1])
+
+ try:
+ obj = package.get_member(parent_name).all_members[stack[-1]]
+ except (KeyError, ValueError, AliasResolutionError):
+ print(f"{name} not found in {package.name}, it's probably ok.")
+ return None
+
+ if obj.has_docstring:
+ docstring = obj.docstring.value
+ # Remove signature if present in docstring
+ if docstring.startswith(obj.name) or (
+ (hasattr(obj.parent, "name") and
+ docstring.startswith(f"{obj.parent.name}.{obj.name}"))):
+ docstring = "\n".join(docstring.splitlines()[2:])
+ # Skip empty docstrings
+ if docstring.strip() == "":
+ return None
+ # Indent docstring
+ indentation_prefix = indentation * " "
+ docstring = indent(docstring + '\n"""', indentation_prefix)
+ docstring = '"""\n' + docstring
+ return docstring
+ return None
+
+
+class ReplaceEllipsis(libcst.CSTTransformer):
+ def __init__(self, package, namespace):
+ self.package = package
+ self.base_namespace = namespace
+ self.stack = []
+ self.indentation = 0
+
+ # Insert module level docstring if _clone_signature is used
+ def leave_Module(self, original_node, updated_node):
+ new_body = []
+ clone_matcher = m.SimpleStatementLine(
+ body=[m.Assign(
+ value=m.Call(func=m.Name(value="_clone_signature"))
+ ), m.ZeroOrMore()]
+ )
+ for statement in updated_node.body:
+ new_body.append(statement)
+ if m.matches(statement, clone_matcher):
+ name = statement.body[0].targets[0].target.value
+ if self.base_namespace:
+ name = f"{self.base_namespace}.{name}"
+ docstring = _get_docstring(name, self.package, 0)
+ if docstring is not None:
+ new_expr = libcst.Expr(value=libcst.SimpleString(docstring))
+ new_line = libcst.SimpleStatementLine(body=[new_expr])
+ new_body.append(new_line)
+
+ return updated_node.with_changes(body=new_body)
+
+ def visit_ClassDef(self, node):
+ self.stack.append(node.name.value)
+ self.indentation += 1
+
+ def leave_ClassDef(self, original_node, updated_node):
+ name = ".".join(self.stack)
+ if self.base_namespace:
+ name = self.base_namespace + "." + name
+
+ class_matcher_1 = m.ClassDef(
+ name=m.Name(),
+ body=m.IndentedBlock(
+ body=[m.SimpleStatementLine(
+ body=[m.Expr(m.Ellipsis()), m.ZeroOrMore()]
+ ), m.ZeroOrMore()]
+ )
+ )
+ class_matcher_2 = m.ClassDef(
+ name=m.Name(),
+ body=m.IndentedBlock(
+ body=[m.FunctionDef(), m.ZeroOrMore()]
+ )
+ )
+
+ if m.matches(updated_node, class_matcher_1):
+ docstring = _get_docstring(name, self.package, self.indentation)
+ if docstring is not None:
+ new_node = libcst.SimpleString(value=docstring)
+ updated_node = updated_node.deep_replace(
+ updated_node.body.body[0].body[0].value, new_node)
+
+ if m.matches(updated_node, class_matcher_2):
+ docstring = _get_docstring(name, self.package, self.indentation)
+ if docstring is not None:
+ new_docstring = libcst.SimpleString(value=docstring)
+ new_body = [
+ libcst.SimpleWhitespace(self.indentation * " "),
+ libcst.Expr(value=new_docstring),
+ libcst.Newline()
+ ] + list(updated_node.body.body)
+ new_body = libcst.IndentedBlock(body=new_body)
+ updated_node = updated_node.with_changes(body=new_body)
+
+ self.stack.pop()
+ self.indentation -= 1
+ return updated_node
+
+ def visit_FunctionDef(self, node):
+ self.stack.append(node.name.value)
+ self.indentation += 1
+
+ def leave_FunctionDef(self, original_node, updated_node):
+ name = ".".join(self.stack)
+ if self.base_namespace:
+ name = self.base_namespace + "." + name
+
+ function_matcher = m.FunctionDef(
+ name=m.Name(),
+ body=m.SimpleStatementSuite(
+ body=[m.Expr(
+ m.Ellipsis()
+ )]))
+ if m.matches(original_node, function_matcher):
+ docstring = _get_docstring(name, self.package, self.indentation)
+ if docstring is not None:
+ new_docstring = libcst.SimpleString(value=docstring)
+ new_body = [
+ libcst.SimpleWhitespace(self.indentation * " "),
+ libcst.Expr(value=new_docstring),
+ libcst.Newline()
+ ]
+ new_body = libcst.IndentedBlock(body=new_body)
+ updated_node = updated_node.with_changes(body=new_body)
+
+ self.stack.pop()
+ self.indentation -= 1
+ return updated_node
+
+
+@click.command()
+@click.argument('pyarrow_folder', type=click.Path(resolve_path=True))
+def add_docs_to_stub_files(pyarrow_folder):
+ print("Updating docstrings of stub files in:", pyarrow_folder)
+ package = griffe.load("pyarrow", try_relative_path=True,
+ force_inspection=True, resolve_aliases=True)
+ lib_modules = ["array", "builder", "compat", "config", "device", "error", "io",
+ "_ipc", "memory", "pandas_shim", "scalar", "table", "tensor",
+ "_types"]
+
+ for stub_file in Path(pyarrow_folder).rglob('*.pyi'):
+ if stub_file.name == "_stubs_typing.pyi":
+ continue
+ module = stub_file.with_suffix('').name
+ print(f"[{stub_file} {module}]")
+
+ with open(stub_file, 'r') as f:
+ tree = libcst.parse_module(f.read())
+
+ if module in lib_modules:
+ module = "lib"
+ elif stub_file.parent.name in ["parquet", "interchange"]:
+ module = f"{stub_file.parent.name}.{module}"
+ elif module == "__init__":
+ module = ""
+
+ modified_tree = tree.visit(ReplaceEllipsis(package, module))
+ with open(stub_file, "w") as f:
+ f.write(modified_tree.code)
+ print("\n")
+
+
+if __name__ == "__main__":
+ docstrings_map = {}
+ add_docs_to_stub_files(obj={})
diff --git a/docs/source/developers/python/development.rst b/docs/source/developers/python/development.rst
index 5529ad25a294..2e2413522439 100644
--- a/docs/source/developers/python/development.rst
+++ b/docs/source/developers/python/development.rst
@@ -42,7 +42,7 @@ Unit Testing
============
We are using `pytest `_ to develop our unit
-test suite. After `building the project `_ you can run its unit tests
+test suite. After `building the project `_ you can run its unit tests
like so:
.. code-block::
diff --git a/python/pyarrow-stubs/pyarrow/__init__.pyi b/python/pyarrow-stubs/pyarrow/__init__.pyi
index ccec8d5abc07..a38ddaa6fe3e 100644
--- a/python/pyarrow-stubs/pyarrow/__init__.pyi
+++ b/python/pyarrow-stubs/pyarrow/__init__.pyi
@@ -15,15 +15,682 @@
# specific language governing permissions and limitations
# under the License.
-"""Type stubs for PyArrow.
+from typing import Any
+import pyarrow.lib as _lib
-This is a placeholder stub file.
-Complete type annotations will be added in subsequent PRs.
-"""
+from pyarrow.lib import (
+ BuildInfo,
+ CppBuildInfo,
+ RuntimeInfo,
+ set_timezone_db_path,
+ MonthDayNano,
+ VersionInfo,
+ build_info,
+ cpp_build_info,
+ cpp_version,
+ cpp_version_info,
+ runtime_info,
+ cpu_count,
+ set_cpu_count,
+ enable_signal_handlers,
+ io_thread_count,
+ set_io_thread_count,
+)
+
+from pyarrow.lib import (
+ null,
+ bool_,
+ int8,
+ int16,
+ int32,
+ int64,
+ uint8,
+ uint16,
+ uint32,
+ uint64,
+ time32,
+ time64,
+ timestamp,
+ date32,
+ date64,
+ duration,
+ month_day_nano_interval,
+ float16,
+ float32,
+ float64,
+ binary,
+ string,
+ utf8,
+ binary_view,
+ string_view,
+ large_binary,
+ large_string,
+ large_utf8,
+ decimal32,
+ decimal64,
+ decimal128,
+ decimal256,
+ list_,
+ large_list,
+ list_view,
+ large_list_view,
+ map_,
+ struct,
+ union,
+ sparse_union,
+ dense_union,
+ dictionary,
+ run_end_encoded,
+ json_,
+ uuid,
+ fixed_shape_tensor,
+ bool8,
+ opaque,
+ field,
+ type_for_alias,
+ DataType,
+ DictionaryType,
+ StructType,
+ ListType,
+ LargeListType,
+ FixedSizeListType,
+ ListViewType,
+ LargeListViewType,
+ MapType,
+ UnionType,
+ SparseUnionType,
+ DenseUnionType,
+ TimestampType,
+ Time32Type,
+ Time64Type,
+ DurationType,
+ FixedSizeBinaryType,
+ Decimal32Type,
+ Decimal64Type,
+ Decimal128Type,
+ Decimal256Type,
+ BaseExtensionType,
+ ExtensionType,
+ RunEndEncodedType,
+ FixedShapeTensorType,
+ Bool8Type,
+ UuidType,
+ JsonType,
+ OpaqueType,
+ UnknownExtensionType,
+ register_extension_type,
+ unregister_extension_type,
+ DictionaryMemo,
+ KeyValueMetadata,
+ Field,
+ Schema,
+ schema,
+ unify_schemas,
+ Array,
+ Tensor,
+ array,
+ arange,
+ chunked_array,
+ record_batch,
+ nulls,
+ repeat,
+ SparseCOOTensor,
+ SparseCSRMatrix,
+ SparseCSCMatrix,
+ SparseCSFTensor,
+ infer_type,
+ from_numpy_dtype,
+ NullArray,
+ NumericArray,
+ IntegerArray,
+ FloatingPointArray,
+ BooleanArray,
+ Int8Array,
+ UInt8Array,
+ Int16Array,
+ UInt16Array,
+ Int32Array,
+ UInt32Array,
+ Int64Array,
+ UInt64Array,
+ HalfFloatArray,
+ FloatArray,
+ DoubleArray,
+ ListArray,
+ LargeListArray,
+ FixedSizeListArray,
+ ListViewArray,
+ LargeListViewArray,
+ MapArray,
+ UnionArray,
+ BinaryArray,
+ StringArray,
+ LargeBinaryArray,
+ LargeStringArray,
+ BinaryViewArray,
+ StringViewArray,
+ FixedSizeBinaryArray,
+ DictionaryArray,
+ Date32Array,
+ Date64Array,
+ TimestampArray,
+ Time32Array,
+ Time64Array,
+ DurationArray,
+ MonthDayNanoIntervalArray,
+ Decimal32Array,
+ Decimal64Array,
+ Decimal128Array,
+ Decimal256Array,
+ StructArray,
+ ExtensionArray,
+ RunEndEncodedArray,
+ FixedShapeTensorArray,
+ Bool8Array,
+ UuidArray,
+ JsonArray,
+ OpaqueArray,
+ scalar,
+ NA,
+ _NULL as NULL,
+ Scalar,
+ NullScalar,
+ BooleanScalar,
+ Int8Scalar,
+ Int16Scalar,
+ Int32Scalar,
+ Int64Scalar,
+ UInt8Scalar,
+ UInt16Scalar,
+ UInt32Scalar,
+ UInt64Scalar,
+ HalfFloatScalar,
+ FloatScalar,
+ DoubleScalar,
+ Decimal32Scalar,
+ Decimal64Scalar,
+ Decimal128Scalar,
+ Decimal256Scalar,
+ ListScalar,
+ LargeListScalar,
+ FixedSizeListScalar,
+ ListViewScalar,
+ LargeListViewScalar,
+ Date32Scalar,
+ Date64Scalar,
+ Time32Scalar,
+ Time64Scalar,
+ TimestampScalar,
+ DurationScalar,
+ MonthDayNanoIntervalScalar,
+ BinaryScalar,
+ LargeBinaryScalar,
+ BinaryViewScalar,
+ StringScalar,
+ LargeStringScalar,
+ StringViewScalar,
+ FixedSizeBinaryScalar,
+ DictionaryScalar,
+ MapScalar,
+ StructScalar,
+ UnionScalar,
+ RunEndEncodedScalar,
+ ExtensionScalar,
+ Bool8Scalar,
+ UuidScalar,
+ JsonScalar,
+ OpaqueScalar,
+)
+
+
+# Buffers, allocation
+from pyarrow.lib import (
+ DeviceAllocationType,
+ Device,
+ MemoryManager,
+ default_cpu_memory_manager
+)
+
+from pyarrow.lib import (
+ Buffer,
+ ResizableBuffer,
+ foreign_buffer,
+ py_buffer,
+ Codec,
+ compress,
+ decompress,
+ allocate_buffer,
+)
+
+from pyarrow.lib import (
+ MemoryPool,
+ LoggingMemoryPool,
+ ProxyMemoryPool,
+ total_allocated_bytes,
+ set_memory_pool,
+ default_memory_pool,
+ system_memory_pool,
+ jemalloc_memory_pool,
+ mimalloc_memory_pool,
+ logging_memory_pool,
+ proxy_memory_pool,
+ log_memory_allocations,
+ jemalloc_set_decay_ms,
+ supported_memory_backends,
+)
+
+# I/O
+from pyarrow.lib import (
+ NativeFile,
+ PythonFile,
+ BufferedInputStream,
+ BufferedOutputStream,
+ CacheOptions,
+ CompressedInputStream,
+ CompressedOutputStream,
+ TransformInputStream,
+ transcoding_input_stream,
+ FixedSizeBufferWriter,
+ BufferReader,
+ BufferOutputStream,
+ OSFile,
+ MemoryMappedFile,
+ memory_map,
+ create_memory_map,
+ MockOutputStream,
+ input_stream,
+ output_stream,
+ have_libhdfs,
+)
+
+from pyarrow.lib import (
+ ChunkedArray,
+ RecordBatch,
+ Table,
+ table,
+ concat_arrays,
+ concat_batches,
+ concat_tables,
+ TableGroupBy,
+ RecordBatchReader,
+)
+
+# Exceptions
+from pyarrow.lib import (
+ ArrowCancelled,
+ ArrowCapacityError,
+ ArrowException,
+ ArrowKeyError,
+ ArrowIndexError,
+ ArrowInvalid,
+ ArrowIOError,
+ ArrowMemoryError,
+ ArrowNotImplementedError,
+ ArrowTypeError,
+ ArrowSerializationError,
+)
+
+from pyarrow.ipc import serialize_pandas, deserialize_pandas
+import pyarrow.ipc as ipc
+import pyarrow.lib as lib
+import pyarrow.types as types
+import pyarrow.feather as feather
+import pyarrow.compute as compute
+import pyarrow.csv as csv
+import pyarrow.json as json
+import pyarrow.dataset as dataset
+
+# ----------------------------------------------------------------------
+# Deprecations
+
+from pyarrow.util import _deprecate_api, _deprecate_class
+
+from pyarrow.ipc import (
+ Message,
+ MessageReader,
+ MetadataVersion,
+ RecordBatchFileReader,
+ RecordBatchFileWriter,
+ RecordBatchStreamReader,
+ RecordBatchStreamWriter,
+)
+
+
+__version__: str
+_gc_enabled: bool
+
+
+def show_versions() -> None: ...
+def show_info() -> None: ...
+def _module_is_available(module: str) -> bool: ...
+def _filesystem_is_available(fs: str) -> bool: ...
+
+
+def get_include() -> str: ...
+def _get_pkg_config_executable() -> str: ...
+def _has_pkg_config(pkgname: str) -> bool: ...
+def _read_pkg_config_variable(pkgname: str, cli_args: list[str]) -> str: ...
+def get_libraries() -> list[str]: ...
+def create_library_symlinks() -> None: ...
+def get_library_dirs() -> list[str]: ...
-from typing import Any
-# TODO(GH-48970): remove __getattr__ before release as this
-# will annotate non-existing attributes as Any.
-# https://github.com/apache/arrow/issues/48970
-def __getattr__(name: str) -> Any: ...
+__all__ = [
+ "__version__",
+ "_lib",
+ "_gc_enabled",
+ "BuildInfo",
+ "CppBuildInfo",
+ "RuntimeInfo",
+ "set_timezone_db_path",
+ "MonthDayNano",
+ "VersionInfo",
+ "build_info",
+ "cpp_build_info",
+ "cpp_version",
+ "cpp_version_info",
+ "runtime_info",
+ "cpu_count",
+ "set_cpu_count",
+ "enable_signal_handlers",
+ "io_thread_count",
+ "set_io_thread_count",
+ "show_versions",
+ "show_info",
+ "_module_is_available",
+ "_filesystem_is_available",
+ "null",
+ "bool_",
+ "int8",
+ "int16",
+ "int32",
+ "int64",
+ "uint8",
+ "uint16",
+ "uint32",
+ "uint64",
+ "time32",
+ "time64",
+ "timestamp",
+ "date32",
+ "date64",
+ "duration",
+ "month_day_nano_interval",
+ "float16",
+ "float32",
+ "float64",
+ "binary",
+ "string",
+ "utf8",
+ "binary_view",
+ "string_view",
+ "large_binary",
+ "large_string",
+ "large_utf8",
+ "decimal32",
+ "decimal64",
+ "decimal128",
+ "decimal256",
+ "list_",
+ "large_list",
+ "list_view",
+ "large_list_view",
+ "map_",
+ "struct",
+ "union",
+ "sparse_union",
+ "dense_union",
+ "dictionary",
+ "run_end_encoded",
+ "json_",
+ "uuid",
+ "fixed_shape_tensor",
+ "bool8",
+ "opaque",
+ "field",
+ "type_for_alias",
+ "DataType",
+ "DictionaryType",
+ "StructType",
+ "ListType",
+ "LargeListType",
+ "FixedSizeListType",
+ "ListViewType",
+ "LargeListViewType",
+ "MapType",
+ "UnionType",
+ "SparseUnionType",
+ "DenseUnionType",
+ "TimestampType",
+ "Time32Type",
+ "Time64Type",
+ "DurationType",
+ "FixedSizeBinaryType",
+ "Decimal32Type",
+ "Decimal64Type",
+ "Decimal128Type",
+ "Decimal256Type",
+ "BaseExtensionType",
+ "ExtensionType",
+ "RunEndEncodedType",
+ "FixedShapeTensorType",
+ "Bool8Type",
+ "UuidType",
+ "JsonType",
+ "OpaqueType",
+ "UnknownExtensionType",
+ "register_extension_type",
+ "unregister_extension_type",
+ "DictionaryMemo",
+ "KeyValueMetadata",
+ "Field",
+ "Schema",
+ "schema",
+ "unify_schemas",
+ "Array",
+ "Tensor",
+ "array",
+ "arange",
+ "chunked_array",
+ "record_batch",
+ "nulls",
+ "repeat",
+ "SparseCOOTensor",
+ "SparseCSRMatrix",
+ "SparseCSCMatrix",
+ "SparseCSFTensor",
+ "infer_type",
+ "from_numpy_dtype",
+ "NullArray",
+ "NumericArray",
+ "IntegerArray",
+ "FloatingPointArray",
+ "BooleanArray",
+ "Int8Array",
+ "UInt8Array",
+ "Int16Array",
+ "UInt16Array",
+ "Int32Array",
+ "UInt32Array",
+ "Int64Array",
+ "UInt64Array",
+ "HalfFloatArray",
+ "FloatArray",
+ "DoubleArray",
+ "ListArray",
+ "LargeListArray",
+ "FixedSizeListArray",
+ "ListViewArray",
+ "LargeListViewArray",
+ "MapArray",
+ "UnionArray",
+ "BinaryArray",
+ "StringArray",
+ "LargeBinaryArray",
+ "LargeStringArray",
+ "BinaryViewArray",
+ "StringViewArray",
+ "FixedSizeBinaryArray",
+ "DictionaryArray",
+ "Date32Array",
+ "Date64Array",
+ "TimestampArray",
+ "Time32Array",
+ "Time64Array",
+ "DurationArray",
+ "MonthDayNanoIntervalArray",
+ "Decimal32Array",
+ "Decimal64Array",
+ "Decimal128Array",
+ "Decimal256Array",
+ "StructArray",
+ "ExtensionArray",
+ "Bool8Array",
+ "UuidArray",
+ "JsonArray",
+ "OpaqueArray",
+ "RunEndEncodedArray",
+ "FixedShapeTensorArray",
+ "scalar",
+ "NA",
+ "NULL",
+ "Scalar",
+ "NullScalar",
+ "BooleanScalar",
+ "Int8Scalar",
+ "Int16Scalar",
+ "Int32Scalar",
+ "Int64Scalar",
+ "UInt8Scalar",
+ "UInt16Scalar",
+ "UInt32Scalar",
+ "UInt64Scalar",
+ "HalfFloatScalar",
+ "FloatScalar",
+ "DoubleScalar",
+ "Decimal32Scalar",
+ "Decimal64Scalar",
+ "Decimal128Scalar",
+ "Decimal256Scalar",
+ "ListScalar",
+ "LargeListScalar",
+ "FixedSizeListScalar",
+ "ListViewScalar",
+ "LargeListViewScalar",
+ "Date32Scalar",
+ "Date64Scalar",
+ "Time32Scalar",
+ "Time64Scalar",
+ "TimestampScalar",
+ "DurationScalar",
+ "MonthDayNanoIntervalScalar",
+ "BinaryScalar",
+ "LargeBinaryScalar",
+ "BinaryViewScalar",
+ "StringScalar",
+ "LargeStringScalar",
+ "StringViewScalar",
+ "FixedSizeBinaryScalar",
+ "DictionaryScalar",
+ "MapScalar",
+ "StructScalar",
+ "UnionScalar",
+ "RunEndEncodedScalar",
+ "ExtensionScalar",
+ "Bool8Scalar",
+ "UuidScalar",
+ "JsonScalar",
+ "OpaqueScalar",
+ "DeviceAllocationType",
+ "Device",
+ "MemoryManager",
+ "default_cpu_memory_manager",
+ "Buffer",
+ "ResizableBuffer",
+ "foreign_buffer",
+ "py_buffer",
+ "Codec",
+ "compress",
+ "decompress",
+ "allocate_buffer",
+ "MemoryPool",
+ "LoggingMemoryPool",
+ "ProxyMemoryPool",
+ "total_allocated_bytes",
+ "set_memory_pool",
+ "default_memory_pool",
+ "system_memory_pool",
+ "jemalloc_memory_pool",
+ "mimalloc_memory_pool",
+ "logging_memory_pool",
+ "proxy_memory_pool",
+ "log_memory_allocations",
+ "jemalloc_set_decay_ms",
+ "supported_memory_backends",
+ "NativeFile",
+ "PythonFile",
+ "BufferedInputStream",
+ "BufferedOutputStream",
+ "CacheOptions",
+ "CompressedInputStream",
+ "CompressedOutputStream",
+ "TransformInputStream",
+ "transcoding_input_stream",
+ "FixedSizeBufferWriter",
+ "BufferReader",
+ "BufferOutputStream",
+ "OSFile",
+ "MemoryMappedFile",
+ "memory_map",
+ "create_memory_map",
+ "MockOutputStream",
+ "input_stream",
+ "output_stream",
+ "have_libhdfs",
+ "ChunkedArray",
+ "RecordBatch",
+ "Table",
+ "table",
+ "concat_arrays",
+ "concat_batches",
+ "concat_tables",
+ "TableGroupBy",
+ "RecordBatchReader",
+ "ArrowCancelled",
+ "ArrowCapacityError",
+ "ArrowException",
+ "ArrowKeyError",
+ "ArrowIndexError",
+ "ArrowInvalid",
+ "ArrowIOError",
+ "ArrowMemoryError",
+ "ArrowNotImplementedError",
+ "ArrowTypeError",
+ "ArrowSerializationError",
+ "serialize_pandas",
+ "deserialize_pandas",
+ "lib",
+ "ipc",
+ "types",
+ "_deprecate_api",
+ "_deprecate_class",
+ "Message",
+ "MessageReader",
+ "MetadataVersion",
+ "RecordBatchFileReader",
+ "RecordBatchFileWriter",
+ "RecordBatchStreamReader",
+ "RecordBatchStreamWriter",
+ "get_include",
+ "_get_pkg_config_executable",
+ "compute",
+ "feather",
+ "csv",
+ "json",
+ "_has_pkg_config",
+ "_read_pkg_config_variable",
+ "get_libraries",
+ "create_library_symlinks",
+ "dataset",
+ "get_library_dirs",
+]
diff --git a/python/pyarrow-stubs/pyarrow/tests/util.pyi b/python/pyarrow-stubs/pyarrow/tests/util.pyi
new file mode 100644
index 000000000000..5ceb784588a7
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/tests/util.pyi
@@ -0,0 +1,93 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections.abc import Callable
+from contextlib import AbstractContextManager
+from decimal import Decimal
+from os import PathLike
+from typing import Any, Literal
+import socket
+
+import pyarrow.fs
+
+
+def randsign() -> int: ...
+def random_seed(seed: int) -> AbstractContextManager[None]: ...
+def randdecimal(precision: int, scale: int) -> Decimal: ...
+def random_ascii(length: int) -> bytes: ...
+def rands(nchars: int) -> str: ...
+def get_modified_env_with_pythonpath() -> dict[str, str]: ...
+def invoke_script(script_name: str, *args: str) -> None: ...
+def changed_environ(name: str, value: str) -> AbstractContextManager[None]: ...
+def change_cwd(path: str | PathLike[str]) -> AbstractContextManager[None]: ...
+def disabled_gc() -> AbstractContextManager[None]: ...
+def _filesystem_uri(path: str) -> str: ...
+
+
+def memory_leak_check(
+ f: Callable[[], Any],
+ metric: Literal['rss', 'vms', 'shared'] = 'rss',
+ threshold: int = 131072,
+ iterations: int = 10,
+ check_interval: int = 1
+) -> None: ...
+
+
+class FSProtocolClass:
+ def __init__(self, path: str | PathLike[str]) -> None: ...
+ def __fspath__(self) -> str: ...
+
+
+class ProxyHandler(pyarrow.fs.FileSystemHandler):
+ _fs: pyarrow.fs.FileSystem
+ def __init__(self, fs: pyarrow.fs.FileSystem) -> None: ...
+ def __eq__(self, other: object) -> bool: ...
+ def __ne__(self, other: object) -> bool: ...
+ def get_type_name(self) -> str: ...
+ def normalize_path(self, path: str) -> str: ...
+ def get_file_info(self, paths: list[str]) -> list[pyarrow.fs.FileInfo]: ...
+ def get_file_info_selector(
+ self, selector: pyarrow.fs.FileSelector) -> list[pyarrow.fs.FileInfo]: ...
+
+ def create_dir(self, path: str, recursive: bool) -> None: ...
+ def delete_dir(self, path: str) -> None: ...
+ def delete_dir_contents(self, path: str, missing_dir_ok: bool = False) -> None: ...
+ def delete_root_dir_contents(self) -> None: ...
+ def delete_file(self, path: str) -> None: ...
+ def move(self, src: str, dest: str) -> None: ...
+ def copy_file(self, src: str, dest: str) -> None: ...
+ def open_input_stream(self, path: str) -> Any: ...
+ def open_input_file(self, path: str) -> Any: ...
+ def open_output_stream(self, path: str, metadata: dict[str, str]) -> Any: ...
+ def open_append_stream(self, path: str, metadata: dict[str, str]) -> Any: ...
+
+
+def _ensure_minio_component_version(component: str, minimum_year: int) -> bool: ...
+def _run_mc_command(mcdir: str, *args: str) -> None: ...
+def windows_has_tzdata() -> bool: ...
+def running_on_musllinux() -> bool: ...
+
+
+def signal_wakeup_fd(
+ *, warn_on_full_buffer: bool = False) -> AbstractContextManager[socket.socket]: ...
+
+
+def _configure_s3_limited_user(
+ s3_server: dict[str, Any], policy: str, username: str, password: str) -> None: ...
+
+
+def _wait_for_minio_startup(
+ mcdir: str, address: str, access_key: str, secret_key: str) -> None: ...
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 18a40d877c34..39abd3ee5715 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -35,11 +35,11 @@
import sys as _sys
try:
- from ._generated_version import version as __version__
+ from ._generated_version import version as __version__ # type: ignore[import-untyped, import-not-found] # noqa: E501
except ImportError:
# Package is not installed, parse git tag at runtime
try:
- import setuptools_scm
+ import setuptools_scm # type: ignore[import-not-found, import-untyped]
# Code duplicated from setup.py to avoid a dependency on each other
def parse_git(root, **kwargs):
@@ -47,14 +47,14 @@ def parse_git(root, **kwargs):
Parse function for setuptools_scm that ignores tags for non-C++
subprojects, e.g. apache-arrow-js-XXX tags.
"""
- from setuptools_scm.git import parse
+ from setuptools_scm.git import parse # type: ignore[import-not-found, import-untyped] # noqa: E501
kwargs['describe_command'] = \
"git describe --dirty --tags --long --match 'apache-arrow-[0-9]*.*'"
return parse(root, **kwargs)
__version__ = setuptools_scm.get_version('../',
parse=parse_git)
except ImportError:
- __version__ = None
+ __version__ = None # type: ignore[assignment]
from pyarrow.lib import (BuildInfo, CppBuildInfo, RuntimeInfo, set_timezone_db_path,
MonthDayNano, VersionInfo, build_info, cpp_build_info,
@@ -150,6 +150,8 @@ def print_entry(label, value):
print(f" {codec: <20}: {status: <8}")
+from pyarrow.lib import (
+ DataType, Array, MemoryPool) # type: ignore[reportAttributeAccessIssue]
from pyarrow.lib import (null, bool_,
int8, int16, int32, int64,
uint8, uint16, uint32, uint64,
@@ -167,7 +169,7 @@ def print_entry(label, value):
bool8, fixed_shape_tensor, json_, opaque, uuid,
field,
type_for_alias,
- DataType, DictionaryType, StructType,
+ DictionaryType, StructType,
ListType, LargeListType, FixedSizeListType,
ListViewType, LargeListViewType,
MapType, UnionType, SparseUnionType, DenseUnionType,
@@ -184,8 +186,7 @@ def print_entry(label, value):
Field,
Schema,
schema,
- unify_schemas,
- Array, Tensor,
+ unify_schemas, Tensor,
array, chunked_array, record_batch, nulls, repeat,
SparseCOOTensor, SparseCSRMatrix, SparseCSCMatrix,
SparseCSFTensor,
@@ -240,7 +241,7 @@ def print_entry(label, value):
from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,
Codec, compress, decompress, allocate_buffer)
-from pyarrow.lib import (MemoryPool, LoggingMemoryPool, ProxyMemoryPool,
+from pyarrow.lib import (LoggingMemoryPool, ProxyMemoryPool,
total_allocated_bytes, set_memory_pool,
default_memory_pool, system_memory_pool,
jemalloc_memory_pool, mimalloc_memory_pool,
@@ -362,7 +363,7 @@ def create_library_symlinks():
if _sys.platform == 'linux':
bundled_libs = glob.glob(_os.path.join(package_cwd, '*.so.*'))
- def get_symlink_path(hard_path):
+ def get_symlink_path(hard_path): # type: ignore[reportRedeclaration]
return hard_path.rsplit('.', 1)[0]
else:
bundled_libs = glob.glob(_os.path.join(package_cwd, '*.*.dylib'))
diff --git a/python/pyarrow/conftest.py b/python/pyarrow/conftest.py
index 41beaa140419..0e8ef66485ec 100644
--- a/python/pyarrow/conftest.py
+++ b/python/pyarrow/conftest.py
@@ -114,13 +114,13 @@
defaults['timezone_data'] = os.path.exists("/usr/share/zoneinfo")
try:
- import cython # noqa
+ import cython # type: ignore[import-untyped, import-not-found] # noqa
defaults['cython'] = True
except ImportError:
pass
try:
- import fastparquet # noqa
+ import fastparquet # type: ignore[import-untyped, import-not-found] # noqa
defaults['fastparquet'] = True
except ImportError:
pass
@@ -347,7 +347,7 @@ def func(ctx, x):
pc.register_aggregate_function(func,
func_name,
- func_doc,
+ func_doc, # type: ignore
{
"x": pa.float64(),
},
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index 575444c1cfc2..3f227d3101c7 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -64,7 +64,8 @@
if os.environ.get('TZDIR', None) is None:
from importlib import resources
try:
- os.environ['TZDIR'] = os.path.join(resources.files('tzdata'), 'zoneinfo')
+ tzdata_path = resources.files('tzdata')
+ os.environ['TZDIR'] = os.path.join(str(tzdata_path), 'zoneinfo')
except ModuleNotFoundError:
print(
'Package "tzdata" not found. Not setting TZDIR environment variable.'
@@ -191,6 +192,7 @@ def decorate(func):
def wrapper(*args, **kwargs):
remaining_attempts = attempts
curr_delay = delay
+ last_exception = None
while remaining_attempts > 0:
try:
return func(*args, **kwargs)
@@ -201,6 +203,9 @@ def wrapper(*args, **kwargs):
if max_delay:
curr_delay = min(curr_delay, max_delay)
time.sleep(curr_delay)
+ # At this point, we've exhausted all attempts and last_exception must be set
+ # (since we must have caught at least one exception to exit the loop)
+ assert last_exception is not None, "No attempts were made"
raise last_exception
return wrapper
return decorate
diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py
index 3c31650ddf94..9188d5d41cc2 100644
--- a/python/pyarrow/tests/strategies.py
+++ b/python/pyarrow/tests/strategies.py
@@ -17,31 +17,32 @@
import datetime
import sys
+from typing import Any
-import pytest
-import hypothesis as h
-import hypothesis.strategies as st
+import pytest # type: ignore[import-not-found]
+import hypothesis as h # type: ignore[import-not-found]
+import hypothesis.strategies as st # type: ignore[import-not-found]
try:
- import hypothesis.extra.numpy as npst
+ import hypothesis.extra.numpy as npst # type: ignore[import-not-found]
except ImportError:
- npst = None
+ npst = None # type: ignore[assignment]
try:
- import hypothesis.extra.pytz as tzst
+ import hypothesis.extra.pytz as tzst # type: ignore[import-not-found]
except ImportError:
- tzst = None
+ tzst = None # type: ignore[assignment]
try:
import zoneinfo
except ImportError:
- zoneinfo = None
+ zoneinfo = None # type: ignore[assignment]
if sys.platform == 'win32':
try:
- import tzdata # noqa:F401
+ import tzdata # type: ignore[import-not-found, import-untyped] # noqa:F401
except ImportError:
- zoneinfo = None
+ zoneinfo = None # type: ignore[assignment]
try:
import numpy as np
except ImportError:
- np = None
+ np = None # type: ignore[assignment]
import pyarrow as pa
@@ -151,12 +152,12 @@
timezones = st.one_of(st.none(), st.timezones())
else:
timezones = st.none()
-timestamp_types = st.builds(
+timestamp_types: Any = st.builds(
pa.timestamp,
unit=st.sampled_from(['s', 'ms', 'us', 'ns']),
tz=timezones
)
-duration_types = st.builds(
+duration_types: Any = st.builds(
pa.duration,
st.sampled_from(['s', 'ms', 'us', 'ns'])
)
@@ -253,13 +254,13 @@ def schemas(type_strategy=primitive_types, max_fields=None):
all_types = st.deferred(
lambda: (
- primitive_types |
- list_types() |
- struct_types() |
- dictionary_types() |
- map_types() |
- list_types(all_types) |
- struct_types(all_types)
+ primitive_types
+ | list_types()
+ | struct_types()
+ | dictionary_types()
+ | map_types()
+ | list_types(all_types) # type: ignore[has-type]
+ | struct_types(all_types) # type: ignore[has-type]
)
)
all_fields = st.one_of(
@@ -303,6 +304,7 @@ def arrays(draw, type, size=None, nullable=True):
elif not isinstance(size, int):
raise TypeError('Size must be an integer')
+ assert npst is not None
if pa.types.is_null(ty):
h.assume(nullable)
value = st.none()
@@ -315,6 +317,7 @@ def arrays(draw, type, size=None, nullable=True):
values = draw(npst.arrays(ty.to_pandas_dtype(), shape=(size,)))
# Workaround ARROW-4952: no easy way to assert array equality
# in a NaN-tolerant way.
+ assert np is not None
values[np.isnan(values)] = -42.0
return pa.array(values, type=ty)
elif pa.types.is_decimal(ty):
@@ -340,9 +343,11 @@ def arrays(draw, type, size=None, nullable=True):
offset = ty.tz.split(":")
offset_hours = int(offset[0])
offset_min = int(offset[1])
- tz = datetime.timedelta(hours=offset_hours, minutes=offset_min)
+ tz = datetime.timezone(
+ datetime.timedelta(hours=offset_hours, minutes=offset_min)
+ )
except ValueError:
- tz = zoneinfo.ZoneInfo(ty.tz)
+ tz = zoneinfo.ZoneInfo(str(ty.tz))
value = st.datetimes(timezones=st.just(tz), min_value=min_datetime,
max_value=max_datetime)
elif pa.types.is_duration(ty):
@@ -501,7 +506,9 @@ def pandas_compatible_list_types(
dictionary_types(
value_strategy=pandas_compatible_dictionary_value_types
),
- pandas_compatible_list_types(pandas_compatible_types),
- struct_types(pandas_compatible_types)
+ pandas_compatible_list_types(
+ pandas_compatible_types # type: ignore[has-type]
+ ),
+ struct_types(pandas_compatible_types) # type: ignore[has-type]
)
)
diff --git a/python/pyarrow/tests/test_adhoc_memory_leak.py b/python/pyarrow/tests/test_adhoc_memory_leak.py
index 76a766984dab..9f61bc7ddfea 100644
--- a/python/pyarrow/tests/test_adhoc_memory_leak.py
+++ b/python/pyarrow/tests/test_adhoc_memory_leak.py
@@ -20,7 +20,7 @@
try:
import numpy as np
except ImportError:
- np = None
+ pass
import pyarrow as pa
import pyarrow.tests.util as test_util
diff --git a/python/pyarrow/tests/test_cpp_internals.py b/python/pyarrow/tests/test_cpp_internals.py
index 7508d8f0b981..7d652acf62f1 100644
--- a/python/pyarrow/tests/test_cpp_internals.py
+++ b/python/pyarrow/tests/test_cpp_internals.py
@@ -20,7 +20,8 @@
import pytest
-from pyarrow._pyarrow_cpp_tests import get_cpp_tests
+from pyarrow._pyarrow_cpp_tests import ( # type: ignore[import-not-found, import-untyped] # noqa: E501
+ get_cpp_tests)
def inject_cpp_tests(ns):
diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py
index a142e66db567..11ef01412a6f 100644
--- a/python/pyarrow/tests/test_cython.py
+++ b/python/pyarrow/tests/test_cython.py
@@ -89,7 +89,7 @@ def test_cython_api(tmpdir):
Basic test for the Cython API.
"""
# Fail early if cython is not found
- import cython # noqa
+ import cython # type: ignore[import-untyped, import-not-found] # noqa
with tmpdir.as_cwd():
# Set up temporary workspace
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index ebac37e862b6..941e73c8167a 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -22,12 +22,13 @@
import weakref
from uuid import uuid4, UUID
import sys
+from typing import cast
import pytest
try:
import numpy as np
except ImportError:
- np = None
+ pass
import pyarrow as pa
from pyarrow.vendored.version import Version
@@ -79,12 +80,14 @@ def __init__(self):
def __arrow_ext_serialize__(self):
# XXX pa.BaseExtensionType should expose C++ serialization method
+ assert isinstance(self.storage_type, IntegerType)
return self.storage_type.__arrow_ext_serialize__()
@classmethod
def __arrow_ext_deserialize__(cls, storage_type, serialized):
+ assert isinstance(storage_type, IntegerType)
deserialized_storage_type = storage_type.__arrow_ext_deserialize__(
- serialized)
+ storage_type, serialized)
assert deserialized_storage_type == storage_type
return cls()
@@ -160,7 +163,7 @@ def __arrow_ext_deserialize__(cls, storage_type, serialized):
class MyStructType(pa.ExtensionType):
- storage_type = pa.struct([('left', pa.int64()),
+ storage_type = pa.struct([('left', pa.int64()), # type: ignore[assignment]
('right', pa.int64())])
def __init__(self):
@@ -221,7 +224,7 @@ def __arrow_ext_serialize__(self):
@classmethod
def __arrow_ext_deserialize__(cls, storage_type, serialized):
assert serialized == b''
- return cls(storage_type)
+ return cls(storage_type, annotation=None)
def ipc_write_batch(batch):
@@ -432,8 +435,8 @@ def test_ext_array_wrap_array():
arr.validate(full=True)
assert isinstance(arr, pa.ChunkedArray)
assert arr.type == ty
- assert arr.chunk(0).storage == storage.chunk(0)
- assert arr.chunk(1).storage == storage.chunk(1)
+ assert arr.chunk(0).storage == storage.chunk(0) # type: ignore[union-attr]
+ assert arr.chunk(1).storage == storage.chunk(1) # type: ignore[union-attr]
# Wrong storage type
storage = pa.array([b"foo", b"bar", None])
@@ -442,7 +445,7 @@ def test_ext_array_wrap_array():
# Not an array or chunked array
with pytest.raises(TypeError, match="Expected array or chunked array"):
- ty.wrap_array(None)
+ ty.wrap_array(None) # type: ignore[arg-type]
def test_ext_scalar_from_array():
@@ -876,7 +879,7 @@ def __arrow_ext_deserialize__(cls, storage_type, serialized):
def __eq__(self, other):
if isinstance(other, pa.BaseExtensionType):
return (isinstance(self, type(other)) and
- self.freq == other.freq)
+ self.freq == other.freq) # type: ignore[attr-defined]
else:
return NotImplemented
@@ -902,7 +905,7 @@ def __arrow_ext_deserialize__(cls, storage_type, serialized):
storage_type, serialized).freq
return PeriodTypeWithToPandasDtype(freq)
- def to_pandas_dtype(self):
+ def to_pandas_dtype(self): # type: ignore[override]
import pandas as pd
return pd.PeriodDtype(freq=self.freq)
@@ -1033,7 +1036,7 @@ def test_generic_ext_array_pickling(registered_period_type, pickle_module):
def test_generic_ext_type_register(registered_period_type):
# test that trying to register other type does not segfault
with pytest.raises(TypeError):
- pa.register_extension_type(pa.string())
+ pa.register_extension_type(pa.string()) # type: ignore[arg-type]
# register second time raises KeyError
period_type = PeriodType('D')
@@ -1058,11 +1061,13 @@ def test_parquet_period(tmpdir, registered_period_type):
# in the serialized arrow schema
meta = pq.read_metadata(filename)
assert meta.schema.column(0).physical_type == "INT64"
+ assert meta.metadata is not None
assert b"ARROW:schema" in meta.metadata
import base64
decoded_schema = base64.b64decode(meta.metadata[b"ARROW:schema"])
- schema = pa.ipc.read_schema(pa.BufferReader(decoded_schema))
+ schema = pa.ipc.read_schema(pa.BufferReader(
+ decoded_schema))
# Since the type could be reconstructed, the extension type metadata is
# absent.
assert schema.field("ext").metadata == {}
@@ -1434,6 +1439,7 @@ def test_tensor_class_methods(np_type_str):
storage = pa.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
pa.list_(arrow_type, 6))
arr = pa.ExtensionArray.from_storage(tensor_type, storage)
+ arr = cast(pa.FixedShapeTensorArray, arr)
expected = np.array(
[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]],
dtype=np.dtype(np_type_str)
@@ -1442,7 +1448,7 @@ def test_tensor_class_methods(np_type_str):
np.testing.assert_array_equal(arr.to_numpy_ndarray(), expected)
expected = np.array([[[7, 8, 9], [10, 11, 12]]], dtype=np.dtype(np_type_str))
- result = arr[1:].to_numpy_ndarray()
+ result = arr[1:].to_numpy_ndarray() # type: ignore[union-attr]
np.testing.assert_array_equal(result, expected)
values = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]
@@ -1452,35 +1458,43 @@ def test_tensor_class_methods(np_type_str):
tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 2, 3], permutation=[0, 1, 2])
result = pa.ExtensionArray.from_storage(tensor_type, storage)
+ result = cast(pa.FixedShapeTensorArray, result)
expected = np.array(
[[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]],
dtype=np.dtype(np_type_str)
)
np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
- result = flat_arr.reshape(1, 2, 3, 2)
+ result_reshaped = flat_arr.reshape(1, 2, 3, 2)
expected = np.array(
[[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]],
dtype=np.dtype(np_type_str)
)
- np.testing.assert_array_equal(result, expected)
+ np.testing.assert_array_equal(result_reshaped, expected)
tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 2, 3], permutation=[0, 2, 1])
result = pa.ExtensionArray.from_storage(tensor_type, storage)
+ result = cast(pa.FixedShapeTensorArray, result)
expected = as_strided(flat_arr, shape=(1, 2, 3, 2),
strides=(bw * 12, bw * 6, bw, bw * 3))
np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 2, 3], permutation=[2, 0, 1])
- result = pa.ExtensionArray.from_storage(tensor_type, storage)
+ result = pa.ExtensionArray.from_storage(
+ tensor_type, storage) # type: ignore[assignment]
expected = as_strided(flat_arr, shape=(1, 3, 2, 2),
strides=(bw * 12, bw, bw * 6, bw * 2))
- np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
-
- assert result.type.permutation == [2, 0, 1]
- assert result.type.shape == [2, 2, 3]
+ np.testing.assert_array_equal(
+ result.to_numpy_ndarray(), expected) # type: ignore[union-attr]
+
+ result_type = result.type
+ assert isinstance(result, pa.FixedShapeTensorArray)
+ assert isinstance(result_type, pa.FixedShapeTensorType)
+ assert result_type.permutation == [2, 0, 1]
+ assert result_type.shape == [2, 2, 3]
assert result.to_tensor().shape == (1, 3, 2, 2)
- assert result.to_tensor().strides == (12 * bw, 1 * bw, 6 * bw, 2 * bw)
+ assert result.to_tensor().strides == (12 * bw, 1 * bw, 6 * bw,
+ 2 * bw)
@pytest.mark.numpy
@@ -1508,17 +1522,23 @@ def test_tensor_array_from_numpy(np_type_str):
arr = flat_arr.reshape(1, 3, 4)
tensor_array_from_numpy = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
- assert tensor_array_from_numpy.type.shape == [3, 4]
- assert tensor_array_from_numpy.type.permutation == [0, 1]
- assert tensor_array_from_numpy.type.dim_names is None
+ result_type = tensor_array_from_numpy.type
+ assert isinstance(tensor_array_from_numpy, pa.FixedShapeTensorArray)
+ assert isinstance(result_type, pa.FixedShapeTensorType)
+ assert result_type.shape == [3, 4]
+ assert result_type.permutation == [0, 1]
+ assert result_type.dim_names is None
assert tensor_array_from_numpy.to_tensor() == pa.Tensor.from_numpy(arr)
arr = as_strided(flat_arr, shape=(1, 2, 3, 2),
strides=(bw * 12, bw * 6, bw, bw * 3))
tensor_array_from_numpy = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
- assert tensor_array_from_numpy.type.shape == [2, 2, 3]
- assert tensor_array_from_numpy.type.permutation == [0, 2, 1]
- assert tensor_array_from_numpy.type.dim_names is None
+ result_type = tensor_array_from_numpy.type
+ assert isinstance(tensor_array_from_numpy, pa.FixedShapeTensorArray)
+ assert isinstance(result_type, pa.FixedShapeTensorType)
+ assert result_type.shape == [2, 2, 3]
+ assert result_type.permutation == [0, 2, 1]
+ assert result_type.dim_names is None
assert tensor_array_from_numpy.to_tensor() == pa.Tensor.from_numpy(arr)
arr = flat_arr.reshape(1, 2, 3, 2)
@@ -1532,7 +1552,8 @@ def test_tensor_array_from_numpy(np_type_str):
arr = np.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
dtype=np.dtype(np_type_str))
expected = arr[1:]
- result = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)[1:].to_numpy_ndarray()
+ result = cast(pa.FixedShapeTensorArray, pa.FixedShapeTensorArray.from_numpy_ndarray(
+ arr)[1:]).to_numpy_ndarray()
np.testing.assert_array_equal(result, expected)
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=np.dtype(np_type_str))
@@ -1559,22 +1580,27 @@ def test_tensor_array_from_numpy(np_type_str):
dim_names = ["a", "b"]
tensor_array_from_numpy = pa.FixedShapeTensorArray.from_numpy_ndarray(
arr, dim_names=dim_names)
- assert tensor_array_from_numpy.type.value_type == arrow_type
- assert tensor_array_from_numpy.type.shape == [2, 3]
- assert tensor_array_from_numpy.type.dim_names == dim_names
+ result_type = tensor_array_from_numpy.type
+ assert isinstance(tensor_array_from_numpy, pa.FixedShapeTensorArray)
+ assert isinstance(result_type, pa.FixedShapeTensorType)
+ assert result_type.value_type == arrow_type
+ assert result_type.shape == [2, 3]
+ assert result_type.dim_names == dim_names
with pytest.raises(ValueError, match="The length of dim_names"):
pa.FixedShapeTensorArray.from_numpy_ndarray(arr, dim_names=['only_one'])
with pytest.raises(TypeError, match="dim_names must be a tuple or list"):
- pa.FixedShapeTensorArray.from_numpy_ndarray(arr, dim_names=123)
+ pa.FixedShapeTensorArray.from_numpy_ndarray(
+ arr, dim_names=123) # type: ignore[arg-type]
with pytest.raises(TypeError, match="dim_names must be a tuple or list"):
pa.FixedShapeTensorArray.from_numpy_ndarray(
- arr, dim_names=(x for x in range(2)))
+ arr, dim_names=(x for x in range(2))) # type: ignore[arg-type]
with pytest.raises(TypeError, match="Each element of dim_names must be a string"):
- pa.FixedShapeTensorArray.from_numpy_ndarray(arr, dim_names=[0, 1])
+ pa.FixedShapeTensorArray.from_numpy_ndarray(
+ arr, dim_names=[0, 1]) # type: ignore[arg-type]
@pytest.mark.numpy
@@ -1845,14 +1871,18 @@ def test_bool8_to_numpy_conversion():
assert np.array_equal(arr_to_np, np_arr_no_nulls)
# same underlying buffer
- assert arr_to_np.ctypes.data == arr_no_nulls.buffers()[1].address
+ buffer = arr_no_nulls.buffers()[1]
+ assert buffer is not None
+ assert arr_to_np.ctypes.data == buffer.address
# if the user requests a writable array, a copy should be performed
arr_to_np_writable = arr_no_nulls.to_numpy(zero_copy_only=False, writable=True)
assert np.array_equal(arr_to_np_writable, np_arr_no_nulls)
# different underlying buffer
- assert arr_to_np_writable.ctypes.data != arr_no_nulls.buffers()[1].address
+ buffer = arr_no_nulls.buffers()[1]
+ assert buffer is not None
+ assert arr_to_np_writable.ctypes.data != buffer.address
@pytest.mark.numpy
@@ -1867,7 +1897,9 @@ def test_bool8_from_numpy_conversion():
assert arr_from_np == canonical_bool8_arr_no_nulls
# same underlying buffer
- assert arr_from_np.buffers()[1].address == np_arr_no_nulls.ctypes.data
+ buffer = arr_from_np.buffers()[1]
+ assert buffer is not None
+ assert buffer.address == np_arr_no_nulls.ctypes.data
# conversion only valid for 1-D arrays
with pytest.raises(
@@ -1882,7 +1914,7 @@ def test_bool8_from_numpy_conversion():
ValueError,
match="Cannot convert 0-D array to bool8 array",
):
- pa.Bool8Array.from_numpy(np.bool_())
+ pa.Bool8Array.from_numpy(np.bool_(False)) # type: ignore[arg-type]
# must use compatible storage type
with pytest.raises(
diff --git a/python/pyarrow/tests/test_gdb.py b/python/pyarrow/tests/test_gdb.py
index 912953ae60d2..50d81b686aca 100644
--- a/python/pyarrow/tests/test_gdb.py
+++ b/python/pyarrow/tests/test_gdb.py
@@ -101,6 +101,8 @@ def wait_until_ready(self):
Record output until the gdb prompt displays. Return recorded output.
"""
# TODO: add timeout?
+ assert self.proc is not None
+ assert self.proc.stdout is not None
while (not self.last_stdout_line.startswith(b"(gdb) ") and
self.proc.poll() is None):
block = self.proc.stdout.read(4096)
@@ -125,6 +127,8 @@ def wait_until_ready(self):
return out
def issue_command(self, line):
+ assert self.proc is not None
+ assert self.proc.stdin is not None
line = line.encode('utf-8') + b"\n"
if self.verbose:
sys.stdout.buffer.write(line)
@@ -158,6 +162,7 @@ def select_frame(self, func_name):
m = re.search(pat, out)
if m is None:
pytest.fail(f"Could not select frame for function {func_name}")
+ return # Never reached, but helps type checker
frame_num = int(m[1])
out = self.run_command(f"frame {frame_num}")
@@ -165,6 +170,8 @@ def select_frame(self, func_name):
def join(self):
if self.proc is not None:
+ assert self.proc.stdin is not None
+ assert self.proc.stdout is not None
self.proc.stdin.close()
self.proc.stdout.close() # avoid ResourceWarning
self.proc.kill()
diff --git a/python/pyarrow/tests/test_jvm.py b/python/pyarrow/tests/test_jvm.py
index d2ba780efc7f..b5d4e74f126f 100644
--- a/python/pyarrow/tests/test_jvm.py
+++ b/python/pyarrow/tests/test_jvm.py
@@ -38,11 +38,13 @@ def root_allocator():
arrow_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..')
pom_path = os.path.join(arrow_dir, 'java', 'pom.xml')
tree = ET.parse(pom_path)
- version = tree.getroot().find(
+ version_element = tree.getroot().find(
'POM:version',
namespaces={
'POM': 'http://maven.apache.org/POM/4.0.0'
- }).text
+ })
+ assert version_element is not None
+ version = version_element.text
jar_path = os.path.join(
arrow_dir, 'java', 'tools', 'target',
f'arrow-tools-{version}-jar-with-dependencies.jar')
@@ -76,8 +78,8 @@ def test_jvm_buffer(root_allocator):
def test_jvm_buffer_released(root_allocator):
- import jpype.imports # noqa
- from java.lang import IllegalArgumentException
+ import jpype.imports # type: ignore[import-untyped, import-not-found] # noqa
+ from java.lang import IllegalArgumentException # type: ignore[import-not-found]
jvm_buffer = root_allocator.buffer(8)
jvm_buffer.release()
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index 65f0c6081363..20a33a382e41 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -20,11 +20,12 @@
import pytest
import weakref
from collections.abc import Sequence, Mapping
+from typing import cast
try:
import numpy as np
except ImportError:
- np = None
+ pass
import pyarrow as pa
import pyarrow.compute as pc
@@ -68,7 +69,7 @@
pa.Time32Scalar),
(datetime.datetime.now().time(), None, pa.Time64Scalar),
(datetime.timedelta(days=1), None, pa.DurationScalar),
- (pa.MonthDayNano([1, -1, -10100]), None,
+ (pa.MonthDayNano([1, -1, -10100]), None, # type: ignore[call-arg, arg-type]
pa.MonthDayNanoIntervalScalar),
({'a': 1, 'b': [1, 2]}, None, pa.StructScalar),
([('a', 1), ('b', 2)], pa.map_(pa.string(), pa.int8()), pa.MapScalar),
@@ -360,7 +361,8 @@ def test_time_from_datetime_time():
def test_temporal_values(value, time_type: pa.DataType):
time_scalar = pa.scalar(value, type=time_type)
time_scalar.validate(full=True)
- assert time_scalar.value == value
+ assert (time_scalar.value # type: ignore[union-attr, reportAttributeAccessIssue]
+ == value)
def test_cast():
@@ -422,7 +424,9 @@ def test_timestamp():
expected = pd.Timestamp('2000-01-01 12:34:56')
assert arrow_arr[0].as_py() == expected
- assert arrow_arr[0].value * 1000**i == expected.value
+ value = cast(pa.TimestampScalar, arrow_arr[0]).value
+ assert value is not None
+ assert value * 1000**i == expected.value
tz = 'America/New_York'
arrow_type = pa.timestamp(unit, tz=tz)
@@ -434,7 +438,9 @@ def test_timestamp():
.tz_convert(tz))
assert arrow_arr[0].as_py() == expected
- assert arrow_arr[0].value * 1000**i == expected.value
+ value = cast(pa.TimestampScalar, arrow_arr[0]).value
+ assert value is not None
+ assert value * 1000**i == expected.value
@pytest.mark.nopandas
@@ -529,7 +535,7 @@ def test_duration_nanos_nopandas():
def test_month_day_nano_interval():
- triple = pa.MonthDayNano([-3600, 1800, -50])
+ triple = pa.MonthDayNano([-3600, 1800, -50]) # type: ignore[invalid-argument-type]
arr = pa.array([triple])
assert isinstance(arr[0].as_py(), pa.MonthDayNano)
assert arr[0].as_py() == triple
@@ -577,7 +583,7 @@ def test_binary(value, ty, scalar_typ):
with pytest.raises(ValueError):
memoryview(s)
else:
- assert buf.to_pybytes() == value
+ assert buf.to_pybytes() == value # type: ignore[union-attr]
assert isinstance(buf, pa.Buffer)
assert bytes(s) == value
@@ -852,7 +858,7 @@ def test_dictionary(pickle_module):
assert arr.to_pylist() == expected
for j, (i, v) in enumerate(zip(indices, expected)):
- s = arr[j]
+ s = cast(pa.DictionaryScalar, arr[j])
assert s.as_py() == v
assert s.value.as_py() == v
@@ -868,14 +874,14 @@ def test_run_end_encoded():
values = [1, 2, 1, None, 3]
arr = pa.RunEndEncodedArray.from_arrays(run_ends, values)
- scalar = arr[0]
+ scalar = cast(pa.RunEndEncodedScalar, arr[0])
assert isinstance(scalar, pa.RunEndEncodedScalar)
assert isinstance(scalar.value, pa.Int64Scalar)
assert scalar.value == pa.array(values)[0]
assert scalar.as_py() == 1
# null -> .value is still a scalar, as_py returns None
- scalar = arr[10]
+ scalar = cast(pa.RunEndEncodedScalar, arr[10])
assert isinstance(scalar.value, pa.Int64Scalar)
assert scalar.as_py() is None
@@ -901,13 +907,13 @@ def test_union(pickle_module):
with pytest.raises(pa.ArrowNotImplementedError):
pickle_module.loads(pickle_module.dumps(s))
- assert arr[0].type_code == 0
+ assert cast(pa.UnionScalar, arr[0]).type_code == 0
assert arr[0].as_py() == "a"
- assert arr[1].type_code == 0
+ assert cast(pa.UnionScalar, arr[1]).type_code == 0
assert arr[1].as_py() == "b"
- assert arr[2].type_code == 1
+ assert cast(pa.UnionScalar, arr[2]).type_code == 1
assert arr[2].as_py() == 3
- assert arr[3].type_code == 1
+ assert cast(pa.UnionScalar, arr[3]).type_code == 1
assert arr[3].as_py() == 4
# dense
@@ -927,9 +933,9 @@ def test_union(pickle_module):
with pytest.raises(pa.ArrowNotImplementedError):
pickle_module.loads(pickle_module.dumps(s))
- assert arr[0].type_code == 0
+ assert cast(pa.UnionScalar, arr[0]).type_code == 0
assert arr[0].as_py() == b'a'
- assert arr[5].type_code == 1
+ assert cast(pa.UnionScalar, arr[5]).type_code == 1
assert arr[5].as_py() == 3
diff --git a/python/pyarrow/tests/test_strategies.py b/python/pyarrow/tests/test_strategies.py
index babb839b534e..9505b9a11b04 100644
--- a/python/pyarrow/tests/test_strategies.py
+++ b/python/pyarrow/tests/test_strategies.py
@@ -25,7 +25,7 @@
@h.given(past.all_types)
def test_types(ty):
- assert isinstance(ty, pa.lib.DataType)
+ assert isinstance(ty, pa.DataType)
@h.given(past.all_fields)
@@ -41,7 +41,7 @@ def test_schemas(schema):
@pytest.mark.numpy
@h.given(past.all_arrays)
def test_arrays(array):
- assert isinstance(array, pa.lib.Array)
+ assert isinstance(array, pa.Array)
@pytest.mark.numpy
diff --git a/python/pyarrow/tests/test_without_numpy.py b/python/pyarrow/tests/test_without_numpy.py
index 55c12602ce89..c5f5671aabc8 100644
--- a/python/pyarrow/tests/test_without_numpy.py
+++ b/python/pyarrow/tests/test_without_numpy.py
@@ -50,6 +50,7 @@ def test_tensor_to_np():
arr = [[1, 2, 3, 4], [10, 20, 30, 40], [100, 200, 300, 400]]
storage = pa.array(arr, pa.list_(pa.int32(), 4))
tensor_array = pa.ExtensionArray.from_storage(tensor_type, storage)
+ assert isinstance(tensor_array, pa.FixedShapeTensorArray)
tensor = tensor_array.to_tensor()
msg = "Cannot return a numpy.ndarray if NumPy is not present"
diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py
index 7e3dd4324e93..fca0fec1122a 100644
--- a/python/pyarrow/tests/util.py
+++ b/python/pyarrow/tests/util.py
@@ -171,7 +171,8 @@ def get_modified_env_with_pythonpath():
existing_pythonpath = env.get('PYTHONPATH', '')
module_path = os.path.abspath(
- os.path.dirname(os.path.dirname(pa.__file__)))
+ os.path.dirname(os.path.dirname( # type: ignore[no-matching-overload]
+ pa.__file__)))
if existing_pythonpath:
new_pythonpath = os.pathsep.join((module_path, existing_pythonpath))
@@ -336,6 +337,7 @@ def _ensure_minio_component_version(component, minimum_year):
stderr=subprocess.PIPE, encoding='utf-8') as proc:
if proc.wait(10) != 0:
return False
+ assert proc.stdout is not None
stdout = proc.stdout.read()
pattern = component + r' version RELEASE\.(\d+)-.*'
version_match = re.search(pattern, stdout)
@@ -367,6 +369,8 @@ def _run_mc_command(mcdir, *args):
cmd_str = ' '.join(full_args)
print(f'Cmd: {cmd_str}')
print(f' Return: {retval}')
+ assert proc.stdout is not None
+ assert proc.stderr is not None
print(f' Stdout: {proc.stdout.read()}')
print(f' Stderr: {proc.stderr.read()}')
if retval != 0:
diff --git a/python/pyarrow/vendored/docscrape.py b/python/pyarrow/vendored/docscrape.py
index 6c4d6e01400b..47aeeed40aed 100644
--- a/python/pyarrow/vendored/docscrape.py
+++ b/python/pyarrow/vendored/docscrape.py
@@ -18,7 +18,7 @@
import sys
-def strip_blank_lines(l):
+def strip_blank_lines(l): # noqa: E741
"Remove leading and trailing blank lines from a list of lines"
while l and not l[0].strip():
del l[0]
@@ -62,7 +62,7 @@ def read(self):
return ''
def seek_next_non_empty_line(self):
- for l in self[self._l:]:
+ for l in self[self._l:]: # noqa: E741
if l.strip():
break
else:
@@ -185,8 +185,9 @@ def _is_at_section(self):
l2 = self._doc.peek(1).strip() # ---------- or ==========
if len(l2) >= 3 and (set(l2) in ({'-'}, {'='})) and len(l2) != len(l1):
snip = '\n'.join(self._doc._str[:2])+'...'
- self._error_location("potentially wrong underline length... \n%s \n%s in \n%s"
- % (l1, l2, snip), error=False)
+ self._error_location(
+ "potentially wrong underline length... \n%s \n%s in \n%s"
+ % (l1, l2, snip), error=False)
return l2.startswith('-'*len(l1)) or l2.startswith('='*len(l1))
def _strip(self, doc):
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 217dba81b873..19b2186e21ee 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -99,38 +99,26 @@ version_scheme = 'guess-next-dev'
git_describe_command = 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"'
fallback_version = '24.0.0a0'
-# TODO: Enable type checking once stubs are merged
[tool.mypy]
-files = ["pyarrow-stubs"]
+files = ["pyarrow", "pyarrow-stubs"]
mypy_path = "$MYPY_CONFIG_FILE_DIR/pyarrow-stubs"
-exclude = [
- "^pyarrow/",
- "^benchmarks/",
- "^examples/",
- "^scripts/",
-]
+exclude = 'pyarrow/interchange/.*|pyarrow/tests/interchange/.*|pyarrow/vendored/.*|pyarrow/tests/test_cuda*'
-# TODO: Enable type checking once stubs are merged
[tool.pyright]
pythonPlatform = "All"
pythonVersion = "3.10"
-include = ["pyarrow-stubs"]
-exclude = [
- "pyarrow",
- "benchmarks",
- "examples",
- "scripts",
- "build",
-]
+include = ["pyarrow", "pyarrow-stubs"]
+exclude = ["pyarrow/vendored", "pyarrow/interchange", "pyarrow/tests/interchange", "pyarrow/tests/test_cuda*"]
stubPath = "pyarrow-stubs"
typeCheckingMode = "basic"
-# TODO: Enable type checking once stubs are merged
[tool.ty.src]
-include = ["pyarrow-stubs"]
-exclude = [
- "pyarrow",
- "benchmarks",
- "examples",
- "scripts",
-]
+include = ["pyarrow", "pyarrow-stubs"]
+exclude = ["pyarrow/vendored", "pyarrow/interchange", "pyarrow/tests/interchange", "pyarrow/tests/test_cuda*"]
+
+[tool.ty.environment]
+root = ["pyarrow"]
+
+[tool.ty.rules]
+unresolved-import = "ignore"
+unresolved-attribute = "ignore"
diff --git a/python/scripts/run_emscripten_tests.py b/python/scripts/run_emscripten_tests.py
index 406dfc54e4fc..e54f0c223ab4 100644
--- a/python/scripts/run_emscripten_tests.py
+++ b/python/scripts/run_emscripten_tests.py
@@ -114,7 +114,7 @@ def end_headers(self):
def run_server_thread(dist_dir, q):
- global _SERVER_ADDRESS
+ global _SERVER_ADDRESS # noqa: F824
os.chdir(dist_dir)
server = http.server.HTTPServer(("", 0), TemplateOverrider)
q.put(server.server_address)