diff --git a/python/pyarrow-stubs/pyarrow/_csv.pyi b/python/pyarrow-stubs/pyarrow/_csv.pyi
new file mode 100644
index 000000000000..6c911a8b0c1d
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_csv.pyi
@@ -0,0 +1,132 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections.abc import Callable, Sequence
+from dataclasses import dataclass, field
+from typing import IO, Any, Literal
+
+from _typeshed import StrPath
+
+from . import lib
+
+
+@dataclass(kw_only=True)
+class ReadOptions(lib._Weakrefable):
+    use_threads: bool = field(default=True, kw_only=False)  # noqa: Y015
+    block_size: int | float | None = None
+    skip_rows: int = 0
+    skip_rows_after_names: int = 0
+    column_names: Sequence[str] | None = None
+    autogenerate_column_names: bool = False
+    encoding: str = "utf8"
+    def validate(self) -> None: ...
+
+
+@dataclass(kw_only=True)
+class ParseOptions(lib._Weakrefable):
+    delimiter: str = field(default=",", kw_only=False)  # noqa: Y015
+    quote_char: str | Literal[False] = '"'
+    double_quote: bool = True
+    escape_char: str | Literal[False] = False
+    newlines_in_values: bool = False
+    ignore_empty_lines: bool = True
+    invalid_row_handler: Callable[[InvalidRow], str] | None = None
+
+    def validate(self) -> None: ...
+
+
+@dataclass(kw_only=True)
+class ConvertOptions(lib._Weakrefable):
+    check_utf8: bool = field(default=True, kw_only=False)  # noqa: Y015
+    column_types: lib.Schema | dict | Sequence[tuple[str, lib.DataType]] | None = None
+    null_values: list[str] | None = None
+    true_values: list[str] | None = None
+    false_values: list[str] | None = None
+    decimal_point: str = "."
+    strings_can_be_null: bool = False
+    quoted_strings_can_be_null: bool = True
+    include_columns: list[str] | None = None
+    include_missing_columns: bool = False
+    auto_dict_encode: bool = False
+    auto_dict_max_cardinality: int | None = None
+    timestamp_parsers: Sequence[str | lib._Weakrefable] | None = None
+
+    def validate(self) -> None: ...
+
+
+@dataclass(kw_only=True)
+class WriteOptions(lib._Weakrefable):
+    include_header: bool = field(default=True, kw_only=False)  # noqa: Y015
+    batch_size: int = 1024
+    delimiter: str = ","
+    quoting_style: Literal["needed", "all_valid", "none"] = "needed"
+    quoting_header: Literal["needed", "all_valid", "none"] = "needed"
+
+    def validate(self) -> None: ...
+
+
+@dataclass
+class InvalidRow(lib._Weakrefable):
+    expected_columns: int
+    actual_columns: int
+    number: int | None
+    text: str
+
+
+class CSVWriter(lib._CRecordBatchWriter):
+    def __init__(
+        self,
+        # TODO: OutputStream
+        sink: StrPath | IO[Any],
+        schema: lib.Schema,
+        write_options: WriteOptions | None = None,
+        *,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> None: ...
+
+
+class CSVStreamingReader(lib.RecordBatchReader):
+    ...
+
+
+ISO8601: lib._Weakrefable
+
+
+def open_csv(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    convert_options: ConvertOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> CSVStreamingReader: ...
+
+
+def read_csv(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    convert_options: ConvertOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Table: ...
+
+
+def write_csv(
+    data: lib.RecordBatch | lib.Table,
+    output_file: StrPath | lib.NativeFile | IO[Any],
+    write_options: WriteOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> None: ...
diff --git a/python/pyarrow-stubs/pyarrow/_feather.pyi b/python/pyarrow-stubs/pyarrow/_feather.pyi
new file mode 100644
index 000000000000..2f4757cd5f1a
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_feather.pyi
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import IO, Literal
+from collections.abc import Sequence
+
+from _typeshed import StrPath
+
+from .lib import Buffer, NativeFile, Table, _Weakrefable
+
+
+class FeatherError(Exception):
+    ...
+
+
+def write_feather(
+    table: Table,
+    dest: StrPath | IO | NativeFile,
+    compression: str | None = None,
+    compression_level: int | None = None,
+    chunksize: int | None = None,
+    version: Literal[1, 2] = 2,
+): ...
+
+
+class FeatherReader(_Weakrefable):
+    def __init__(
+        self,
+        source: StrPath | IO | NativeFile | Buffer,
+        use_memory_map: bool,
+        use_threads: bool,
+    ) -> None: ...
+    @property
+    def version(self) -> str: ...
+    def read(self) -> Table: ...
+    def read_indices(self, indices: Sequence[int]) -> Table: ...
+    def read_names(self, names: Sequence[str]) -> Table: ...
diff --git a/python/pyarrow-stubs/pyarrow/_ipc.pyi b/python/pyarrow-stubs/pyarrow/_ipc.pyi
new file mode 100644
index 000000000000..5a87f2439046
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_ipc.pyi
@@ -0,0 +1,317 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import enum
+import sys
+
+from io import IOBase
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+from collections.abc import Iterable, Iterator, Mapping
+from typing import Any, Literal, NamedTuple
+
+import pandas as pd
+
+from pyarrow._stubs_typing import SupportPyBuffer
+from pyarrow.lib import MemoryPool, RecordBatch, Schema, Table, Tensor, _Weakrefable
+
+from .io import Buffer, Codec, NativeFile, BufferReader
+from ._types import DictionaryMemo, KeyValueMetadata
+
+
+class MetadataVersion(enum.IntEnum):
+    V1 = enum.auto()
+    V2 = enum.auto()
+    V3 = enum.auto()
+    V4 = enum.auto()
+    V5 = enum.auto()
+
+
+class Alignment(enum.IntEnum):
+    Any = enum.auto()
+    At64Byte = enum.auto()
+    DataTypeSpecific = enum.auto()
+
+
+class WriteStats(NamedTuple):
+    num_messages: int
+    num_record_batches: int
+    num_dictionary_batches: int
+    num_dictionary_deltas: int
+    num_replaced_dictionaries: int
+
+
+class ReadStats(NamedTuple):
+    num_messages: int
+    num_record_batches: int
+    num_dictionary_batches: int
+    num_dictionary_deltas: int
+    num_replaced_dictionaries: int
+
+
+class IpcReadOptions(_Weakrefable):
+    ensure_native_endian: bool
+    use_threads: bool
+    ensure_alignment: Alignment
+    included_fields: list[int] | None
+
+    def __init__(
+        self,
+        *,
+        ensure_native_endian: bool = True,
+        use_threads: bool = True,
+        ensure_alignment: Alignment = ...,
+        included_fields: list[int] | None = None,
+    ) -> None: ...
+
+
+class IpcWriteOptions(_Weakrefable):
+    metadata_version: Any
+    allow_64bit: bool
+    use_legacy_format: bool
+    compression: Any
+    use_threads: bool
+    emit_dictionary_deltas: bool
+    unify_dictionaries: bool
+
+    def __init__(
+        self,
+        *,
+        metadata_version: MetadataVersion = MetadataVersion.V5,
+        allow_64bit: bool = False,
+        use_legacy_format: bool = False,
+        compression: Codec | Literal["lz4", "zstd"] | None = None,
+        use_threads: bool = True,
+        emit_dictionary_deltas: bool = False,
+        unify_dictionaries: bool = False,
+    ) -> None: ...
+
+
+class Message(_Weakrefable):
+    @property
+    def type(self) -> str: ...
+    @property
+    def metadata(self) -> Buffer: ...
+    @property
+    def metadata_version(self) -> MetadataVersion: ...
+    @property
+    def body(self) -> Buffer | None: ...
+    def equals(self, other: Message) -> bool: ...
+
+    def serialize_to(self, sink: NativeFile, alignment: int = 8,
+                     memory_pool: MemoryPool | None = None): ...
+
+    def serialize(self, alignment: int = 8, memory_pool: MemoryPool |
+                  None = None) -> Buffer: ...
+
+
+class MessageReader(_Weakrefable):
+    @classmethod
+    def open_stream(cls, source: bytes | NativeFile |
+                    IOBase | SupportPyBuffer) -> Self: ...
+
+    def __iter__(self) -> Self: ...
+    def read_next_message(self) -> Message: ...
+
+    __next__ = read_next_message
+
+# ----------------------------------------------------------------------
+# File and stream readers and writers
+
+
+class _CRecordBatchWriter(_Weakrefable):
+    def write(self, table_or_batch: Table | RecordBatch): ...
+
+    def write_batch(
+        self,
+        batch: RecordBatch,
+        custom_metadata: Mapping[bytes, bytes] | KeyValueMetadata | None = None,
+    ): ...
+
+    def write_table(self, table: Table, max_chunksize: int | None = None) -> None: ...
+
+    def close(self) -> None: ...
+
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_val, exc_tb): ...
+    @property
+    def stats(self) -> WriteStats: ...
+
+
+class _RecordBatchStreamWriter(_CRecordBatchWriter):
+    @property
+    def _use_legacy_format(self) -> bool: ...
+    @property
+    def _metadata_version(self) -> MetadataVersion: ...
+
+    def _open(
+        self,
+        sink,
+        schema: Schema,
+        options: IpcWriteOptions = IpcWriteOptions(),  # noqa: Y011
+        metadata: dict[bytes, bytes] | None = None,
+    ): ...
+
+
+class _ReadPandasMixin:
+    def read_pandas(self, **options) -> pd.DataFrame: ...
+
+
+class RecordBatchReader(_ReadPandasMixin, _Weakrefable):
+    def __iter__(self) -> Self: ...
+    def read_next_batch(self) -> RecordBatch: ...
+
+    __next__ = read_next_batch
+    @property
+    def schema(self) -> Schema: ...
+
+    def read_next_batch_with_custom_metadata(self) -> RecordBatchWithMetadata: ...
+
+    def iter_batches_with_custom_metadata(
+        self,
+    ) -> Iterator[RecordBatchWithMetadata]: ...
+
+    def read_all(self) -> Table: ...
+
+    def close(self) -> None: ...
+
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_val, exc_tb): ...
+    def cast(self, target_schema: Schema) -> Self: ...
+
+    def _export_to_c(self, out_ptr: int) -> None: ...
+
+    @classmethod
+    def _import_from_c(cls, in_ptr: int) -> Self: ...
+
+    def __arrow_c_stream__(self, requested_schema=None): ...
+
+    @classmethod
+    def _import_from_c_capsule(cls, stream) -> Self: ...
+
+    @classmethod
+    def from_stream(cls, data: Any,
+                    schema: Any = None) -> Self: ...
+
+    @classmethod
+    def from_batches(cls, schema: Any, batches: Iterable[RecordBatch]) -> Self: ...
+
+
+class _RecordBatchStreamReader(RecordBatchReader):
+    @property
+    def stats(self) -> ReadStats: ...
+
+    def _open(
+        self,
+        source,
+        options: IpcReadOptions | None = None,
+        memory_pool: MemoryPool | None = None,
+    ) -> Self: ...
+
+
+class _RecordBatchFileWriter(_RecordBatchStreamWriter):
+    ...
+
+
+class RecordBatchWithMetadata(NamedTuple):
+    batch: RecordBatch
+    custom_metadata: KeyValueMetadata
+
+
+class _RecordBatchFileReader(_ReadPandasMixin, _Weakrefable):
+    @property
+    def num_record_batches(self) -> int: ...
+
+    def get_batch(self, i: int) -> RecordBatch: ...
+
+    get_record_batch = get_batch
+    def get_batch_with_custom_metadata(self, i: int) -> RecordBatchWithMetadata: ...
+
+    def read_all(self) -> Table: ...
+
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_val, exc_tb): ...
+    @property
+    def schema(self) -> Schema: ...
+    @property
+    def stats(self) -> ReadStats: ...
+    @property
+    def metadata(self) -> KeyValueMetadata | None: ...
+
+    def _open(
+        self,
+        source,
+        footer_offset: int | None = None,
+        options: IpcReadOptions | None = None,
+        memory_pool: MemoryPool | None = None,
+    ) -> Self: ...
+
+
+def get_tensor_size(tensor: Tensor) -> int: ...
+
+
+def get_record_batch_size(batch: RecordBatch) -> int: ...
+
+
+def write_tensor(tensor: Tensor, dest: NativeFile) -> int: ...
+
+
+def read_tensor(source: NativeFile) -> Tensor: ...
+
+
+def read_message(source: NativeFile | IOBase | SupportPyBuffer) -> Message: ...
+
+
+def read_schema(obj: Buffer | Message | BufferReader, dictionary_memo: DictionaryMemo |
+                None = None) -> Schema: ...
+
+
+def read_record_batch(
+    obj: Message | SupportPyBuffer,
+    schema: Schema,
+    dictionary_memo: DictionaryMemo | None = None) -> RecordBatch: ...
+
+
+__all__ = [
+    "MetadataVersion",
+    "Alignment",
+    "WriteStats",
+    "ReadStats",
+    "IpcReadOptions",
+    "IpcWriteOptions",
+    "Message",
+    "MessageReader",
+    "_CRecordBatchWriter",
+    "_RecordBatchStreamWriter",
+    "_ReadPandasMixin",
+    "RecordBatchReader",
+    "_RecordBatchStreamReader",
+    "_RecordBatchFileWriter",
+    "RecordBatchWithMetadata",
+    "_RecordBatchFileReader",
+    "get_tensor_size",
+    "get_record_batch_size",
+    "write_tensor",
+    "read_tensor",
+    "read_message",
+    "read_schema",
+    "read_record_batch",
+]
diff --git a/python/pyarrow-stubs/pyarrow/_json.pyi b/python/pyarrow-stubs/pyarrow/_json.pyi
new file mode 100644
index 000000000000..bae2ff404f09
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_json.pyi
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import IO, Any, Literal
+
+from _typeshed import StrPath
+
+from .lib import MemoryPool, RecordBatchReader, Schema, Table, _Weakrefable
+
+
+class ReadOptions(_Weakrefable):
+    use_threads: bool
+    block_size: int
+
+    def __init__(self, use_threads: bool | None = None,
+                 block_size: int | None = None): ...
+
+    def equals(self, other: ReadOptions) -> bool: ...
+
+
+class ParseOptions(_Weakrefable):
+    explicit_schema: Schema
+    newlines_in_values: bool
+    unexpected_field_behavior: Literal["ignore", "error", "infer"]
+
+    def __init__(
+        self,
+        explicit_schema: Schema | None = None,
+        newlines_in_values: bool | None = None,
+        unexpected_field_behavior: Literal["ignore", "error", "infer"] = "infer",
+    ): ...
+    def equals(self, other: ParseOptions) -> bool: ...
+
+
+class JSONStreamingReader(RecordBatchReader):
+    ...
+
+
+def read_json(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Table: ...
+
+
+def open_json(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> JSONStreamingReader: ...
diff --git a/python/pyarrow-stubs/pyarrow/_orc.pyi b/python/pyarrow-stubs/pyarrow/_orc.pyi
new file mode 100644
index 000000000000..faa0f57c1fdc
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_orc.pyi
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import IO, Literal, Any
+
+from .lib import (
+    Buffer,
+    KeyValueMetadata,
+    MemoryPool,
+    NativeFile,
+    RecordBatch,
+    Schema,
+    Table,
+    _Weakrefable,
+)
+
+
+class ORCReader(_Weakrefable):
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def open(self, source: str | NativeFile | Buffer, use_memory_map: bool = True): ...
+    def metadata(self) -> KeyValueMetadata: ...
+    def schema(self) -> Schema: ...
+    def nrows(self) -> int: ...
+    def nstripes(self) -> int: ...
+    def file_version(self) -> str: ...
+    def software_version(self) -> str: ...
+    def compression(self) -> Literal["UNCOMPRESSED",
+                                     "ZLIB", "SNAPPY", "LZ4", "ZSTD"]: ...
+
+    def compression_size(self) -> int: ...
+    def row_index_stride(self) -> int: ...
+    def writer(self) -> str: ...
+    def writer_version(self) -> str: ...
+    def nstripe_statistics(self) -> int: ...
+    def content_length(self) -> int: ...
+    def stripe_statistics_length(self) -> int: ...
+    def file_footer_length(self) -> int: ...
+    def file_postscript_length(self) -> int: ...
+    def file_length(self) -> int: ...
+    def serialized_file_tail(self) -> int: ...
+    def read_stripe(self, n: int, columns: list[str] | None = None) -> RecordBatch: ...
+    def read(self, columns: list[str] | None = None) -> Table: ...
+
+
+class ORCWriter(_Weakrefable):
+    def open(
+        self,
+        where: str | NativeFile | IO,
+        *,
+        file_version: str | None = None,
+        batch_size: int | None = None,
+        stripe_size: int | None = None,
+        compression: Any = 'UNCOMPRESSED',
+        compression_block_size: int | None = None,
+        compression_strategy: Any = 'SPEED',
+        row_index_stride: int | None = None,
+        padding_tolerance: float | None = None,
+        dictionary_key_size_threshold: float | None = None,
+        bloom_filter_columns: list[int] | None = None,
+        bloom_filter_fpp: float | None = None,
+    ) -> None: ...
+    def write(self, table: Table) -> None: ...
+    def close(self) -> None: ...
diff --git a/python/pyarrow-stubs/pyarrow/csv.pyi b/python/pyarrow-stubs/pyarrow/csv.pyi
new file mode 100644
index 000000000000..a7abd413aab4
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/csv.pyi
@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow._csv import (
+    ISO8601,
+    ConvertOptions,
+    CSVStreamingReader,
+    CSVWriter,
+    InvalidRow,
+    ParseOptions,
+    ReadOptions,
+    WriteOptions,
+    open_csv,
+    read_csv,
+    write_csv,
+)
+
+__all__ = [
+    "ISO8601",
+    "ConvertOptions",
+    "CSVStreamingReader",
+    "CSVWriter",
+    "InvalidRow",
+    "ParseOptions",
+    "ReadOptions",
+    "WriteOptions",
+    "open_csv",
+    "read_csv",
+    "write_csv",
+]
diff --git a/python/pyarrow-stubs/pyarrow/feather.pyi b/python/pyarrow-stubs/pyarrow/feather.pyi
new file mode 100644
index 000000000000..cf9d34020913
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/feather.pyi
@@ -0,0 +1,81 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections.abc import Iterable
+from typing import IO, Literal
+
+import pandas as pd
+
+from pyarrow import lib
+from pyarrow.lib import Table
+from pyarrow._typing import StrPath
+from ._feather import FeatherError
+
+
+class FeatherDataset:
+    path_or_paths: str | list[str]
+    validate_schema: bool
+
+    def __init__(self, path_or_paths: str |
+                 list[str], validate_schema: bool = True) -> None: ...
+
+    def read_table(self, columns: list[str] | None = None) -> Table: ...
+    def validate_schemas(self, piece, table: Table) -> None: ...
+
+    def read_pandas(
+        self, columns: list[str] | None = None, use_threads: bool = True
+    ) -> pd.DataFrame: ...
+
+
+def check_chunked_overflow(name: str, col) -> None: ...
+
+
+def write_feather(
+    df: pd.DataFrame | Table | lib.ChunkedArray,
+    dest: StrPath | IO,
+    compression: Literal["zstd", "lz4", "uncompressed", "snappy"] | None = None,
+    compression_level: int | None = None,
+    chunksize: int | None = None,
+    version: Literal[1, 2] = 2,
+) -> None: ...
+
+
+def read_feather(
+    source: StrPath | IO | lib.NativeFile,
+    columns: list[str] | None = None,
+    use_threads: bool = True,
+    memory_map: bool = False,
+    **kwargs,
+) -> pd.DataFrame: ...
+
+
+def read_table(
+    source: StrPath | IO | lib.NativeFile,
+    columns: list[str | int] | Iterable[str | int] | None = None,
+    memory_map: bool = False,
+    use_threads: bool = True,
+) -> Table: ...
+
+
+__all__ = [
+    "FeatherError",
+    "FeatherDataset",
+    "check_chunked_overflow",
+    "write_feather",
+    "read_feather",
+    "read_table",
+]
diff --git a/python/pyarrow-stubs/pyarrow/ipc.pyi b/python/pyarrow-stubs/pyarrow/ipc.pyi
new file mode 100644
index 000000000000..d153ab0f46aa
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/ipc.pyi
@@ -0,0 +1,162 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from io import IOBase
+from typing import Any
+
+from _typeshed import StrPath
+import pandas as pd
+import pyarrow.lib as lib
+
+from pyarrow.lib import (
+    Alignment,
+    IpcReadOptions,
+    IpcWriteOptions,
+    Message,
+    MessageReader,
+    MetadataVersion,
+    ReadStats,
+    RecordBatchReader,
+    WriteStats,
+    _ReadPandasMixin,
+    get_record_batch_size,
+    get_tensor_size,
+    read_message,
+    read_record_batch,
+    read_schema,
+    read_tensor,
+    write_tensor,
+)
+
+
+class RecordBatchStreamReader(lib._RecordBatchStreamReader):
+    def __init__(
+        self,
+        source: bytes | lib.Buffer | lib.NativeFile | IOBase,
+        *,
+        options: IpcReadOptions | None = None,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> None: ...
+
+
+class RecordBatchStreamWriter(lib._RecordBatchStreamWriter):
+    def __init__(
+        self,
+        sink: str | lib.NativeFile | IOBase,
+        schema: lib.Schema,
+        *,
+        use_legacy_format: bool | None = None,
+        options: IpcWriteOptions | None = None,
+    ) -> None: ...
+
+
+class RecordBatchFileReader(lib._RecordBatchFileReader):
+    def __init__(
+        self,
+        source: bytes | lib.Buffer | lib.NativeFile | IOBase,
+        footer_offset: int | None = None,
+        *,
+        options: IpcReadOptions | None = None,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> None: ...
+
+
+class RecordBatchFileWriter(lib._RecordBatchFileWriter):
+    def __init__(
+        self,
+        sink: str | lib.NativeFile | IOBase,
+        schema: lib.Schema,
+        *,
+        use_legacy_format: bool | None = None,
+        options: IpcWriteOptions | None = None,
+    ) -> None: ...
+
+
+def new_stream(
+    sink: str | lib.NativeFile | IOBase,
+    schema: lib.Schema,
+    *,
+    use_legacy_format: bool | None = None,
+    options: IpcWriteOptions | None = None,
+) -> RecordBatchStreamWriter: ...
+
+
+def open_stream(
+    source: bytes | int | lib.Buffer | lib.NativeFile | IOBase,
+    *,
+    options: Any = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> RecordBatchStreamReader: ...
+
+
+def new_file(
+    sink: str | lib.NativeFile | IOBase,
+    schema: lib.Schema,
+    *,
+    use_legacy_format: bool | None = None,
+    options: IpcWriteOptions | None = None,
+    metadata: lib.KeyValueMetadata | dict[bytes, bytes] | None = None,
+) -> RecordBatchFileWriter: ...
+
+
+def open_file(
+    source: StrPath | bytes | lib.Buffer | lib.NativeFile | IOBase,
+    footer_offset: int | None = None,
+    *,
+    options: Any = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> RecordBatchFileReader: ...
+
+
+def serialize_pandas(
+    df: pd.DataFrame, *, nthreads: int | None = None, preserve_index: bool | None = None
+) -> lib.Buffer: ...
+
+
+def deserialize_pandas(
+    buf: lib.Buffer, *, use_threads: bool = True) -> pd.DataFrame: ...
+
+
+__all__ = [
+    "Alignment",
+    "IpcReadOptions",
+    "IpcWriteOptions",
+    "Message",
+    "MessageReader",
+    "MetadataVersion",
+    "ReadStats",
+    "RecordBatchReader",
+    "WriteStats",
+    "_ReadPandasMixin",
+    "get_record_batch_size",
+    "get_tensor_size",
+    "read_message",
+    "read_record_batch",
+    "read_schema",
+    "read_tensor",
+    "write_tensor",
+    "RecordBatchStreamReader",
+    "RecordBatchStreamWriter",
+    "RecordBatchFileReader",
+    "RecordBatchFileWriter",
+    "new_stream",
+    "open_stream",
+    "new_file",
+    "open_file",
+    "serialize_pandas",
+    "deserialize_pandas",
+]
diff --git a/python/pyarrow-stubs/pyarrow/json.pyi b/python/pyarrow-stubs/pyarrow/json.pyi
new file mode 100644
index 000000000000..67768db42e43
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/json.pyi
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow._json import ParseOptions, ReadOptions, open_json, read_json
+
+__all__ = ["ParseOptions", "ReadOptions", "read_json", "open_json"]
diff --git a/python/pyarrow-stubs/pyarrow/orc.pyi b/python/pyarrow-stubs/pyarrow/orc.pyi
new file mode 100644
index 000000000000..f16350d0ffc9
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/orc.pyi
@@ -0,0 +1,146 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import IO, Any, Literal
+
+from _typeshed import StrPath
+
+from . import _orc
+from ._fs import SupportedFileSystem
+from .lib import KeyValueMetadata, NativeFile, RecordBatch, Schema, Table
+
+
+class ORCFile:
+    reader: _orc.ORCReader
+    def __init__(self, source: StrPath | NativeFile | IO) -> None: ...
+    @property
+    def metadata(self) -> KeyValueMetadata: ...
+
+    @property
+    def schema(self) -> Schema: ...
+
+    @property
+    def nrows(self) -> int: ...
+
+    @property
+    def nstripes(self) -> int: ...
+
+    @property
+    def file_version(self) -> str: ...
+
+    @property
+    def software_version(self) -> str: ...
+
+    @property
+    def compression(self) -> Literal["UNCOMPRESSED",
+                                     "ZLIB", "SNAPPY", "LZ4", "ZSTD"]: ...
+
+    @property
+    def compression_size(self) -> int: ...
+
+    @property
+    def writer(self) -> str: ...
+
+    @property
+    def writer_version(self) -> str: ...
+
+    @property
+    def row_index_stride(self) -> int: ...
+
+    @property
+    def nstripe_statistics(self) -> int: ...
+
+    @property
+    def content_length(self) -> int: ...
+
+    @property
+    def stripe_statistics_length(self) -> int: ...
+
+    @property
+    def file_footer_length(self) -> int: ...
+
+    @property
+    def file_postscript_length(self) -> int: ...
+
+    @property
+    def file_length(self) -> int: ...
+
+    def read_stripe(
+        self, n: int, columns: list[str | int] | None = None
+    ) -> RecordBatch: ...
+
+    def read(self, columns: list[str | int] | None = None) -> Table: ...
+
+
+class ORCWriter:
+    writer: _orc.ORCWriter
+    is_open: bool
+
+    def __init__(
+        self,
+        where: StrPath | NativeFile | IO,
+        *,
+        file_version: Any = "0.12",
+        batch_size: Any = 1024,
+        stripe_size: Any = 64 * 1024 * 1024,  # noqa: Y011
+        compression: Any = "UNCOMPRESSED",
+        compression_block_size: Any = 65536,
+        compression_strategy: Any = "SPEED",
+        row_index_stride: Any = 10000,
+        padding_tolerance: Any = 0.0,
+        dictionary_key_size_threshold: Any = 0.0,
+        bloom_filter_columns: Any = None,
+        bloom_filter_fpp: Any = 0.05,
+    ): ...
+    def __enter__(self) -> Self: ...
+    def __exit__(self, *args, **kwargs) -> None: ...
+    def __getattr__(self, name: str) -> Any: ...
+    def write(self, table: Table) -> None: ...
+
+    def close(self) -> None: ...
+
+
+def read_table(
+    source: StrPath | NativeFile | IO,
+    columns: list[str | int] | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+) -> Table: ...
+
+
+# TODO: should not use Any here?
+def write_table(
+    table: Table,
+    where: StrPath | NativeFile | IO,
+    *,
+    file_version: Any = "0.12",
+    batch_size: Any = 1024,
+    stripe_size: Any = 64 * 1024 * 1024,  # noqa: Y011
+    compression: Any = 'UNCOMPRESSED',
+    compression_block_size: Any = 65536,
+    compression_strategy: Any = 'SPEED',
+    row_index_stride: Any = 10000,
+    padding_tolerance: Any = 0.0,
+    dictionary_key_size_threshold: Any = 0.0,
+    bloom_filter_columns: Any = None,
+    bloom_filter_fpp: Any = 0.05,
+) -> None: ...
diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
index 241c27706a6f..4b0ecb9f18e0 100644
--- a/python/pyarrow/feather.py
+++ b/python/pyarrow/feather.py
@@ -183,6 +183,7 @@ def write_feather(df, dest, compression=None, compression_level=None,
                              f'one of {_FEATHER_SUPPORTED_CODECS}')
 
     try:
+        assert version in (1, 2)
         _feather.write_feather(table, dest, compression=compression,
                                compression_level=compression_level,
                                chunksize=chunksize, version=version)
@@ -269,7 +270,7 @@ def read_table(source, columns=None, memory_map=False, use_threads=True):
                         f"Got columns {columns} of types {column_type_names}")
 
     # Feather v1 already respects the column selection
-    if reader.version < 3:
+    if int(reader.version) < 3:
         return table
     # Feather v2 reads with sorted / deduplicated selection
     elif sorted(set(columns)) == columns:
diff --git a/python/pyarrow/orc.py b/python/pyarrow/orc.py
index 4e0d66ec6659..222c289c8793 100644
--- a/python/pyarrow/orc.py
+++ b/python/pyarrow/orc.py
@@ -20,7 +20,7 @@
 import warnings
 
 from pyarrow.lib import Table
-import pyarrow._orc as _orc
+import pyarrow._orc as _orc  # type: ignore[reportMissingModuleSource]
 from pyarrow.fs import _resolve_filesystem_and_path
 
 
@@ -255,9 +255,11 @@ def __init__(self, where, *,
             file_version=file_version,
             batch_size=batch_size,
             stripe_size=stripe_size,
-            compression=compression,
+            compression=compression,  # type: ignore[reportArgumentType]
             compression_block_size=compression_block_size,
-            compression_strategy=compression_strategy,
+            compression_strategy=(
+                compression_strategy  # type: ignore[reportArgumentType]
+            ),
             row_index_stride=row_index_stride,
             padding_tolerance=padding_tolerance,
             dictionary_key_size_threshold=dictionary_key_size_threshold,
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index dce605c7156d..2c271fa9b1bf 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -178,6 +178,7 @@ def test_read_options(pickle_module):
                                  encoding='utf16',
                                  skip_rows_after_names=27)
 
+    assert opts.block_size is not None
     assert opts.block_size > 0
     opts.block_size = 12345
     assert opts.block_size == 12345
@@ -302,6 +303,7 @@ def test_convert_options(pickle_module):
     with pytest.raises(ValueError):
         opts.decimal_point = '..'
 
+    assert opts.auto_dict_max_cardinality is not None
     assert opts.auto_dict_max_cardinality > 0
     opts.auto_dict_max_cardinality = 99999
     assert opts.auto_dict_max_cardinality == 99999
@@ -323,7 +325,7 @@ def test_convert_options(pickle_module):
     with pytest.raises(TypeError, match='DataType expected'):
         opts.column_types = {'a': None}
     with pytest.raises(TypeError):
-        opts.column_types = 0
+        opts.column_types = 0  # type: ignore[reportAttributeAccessIssue]
 
     assert isinstance(opts.null_values, list)
     assert '' in opts.null_values
@@ -1158,10 +1160,14 @@ def test_auto_dict_encode(self):
         table = self.read_bytes(rows, convert_options=opts,
                                 validate_full=False)
         assert table.schema == schema
-        dict_values = table['a'].chunk(0).dictionary
+        column_chunk = table.column('a').chunk(0)
+        assert isinstance(column_chunk, pa.DictionaryArray)
+        dict_values = column_chunk.dictionary
         assert len(dict_values) == 2
         assert dict_values[0].as_py() == "ab"
-        assert dict_values[1].as_buffer() == b"cd\xff"
+        dict_value = dict_values[1]
+        assert isinstance(dict_value, pa.StringScalar)
+        assert dict_value.as_buffer() == b"cd\xff"
 
         # With invalid UTF8, checked
         opts.check_utf8 = True
@@ -1502,7 +1508,7 @@ def signal_from_thread():
 
         # Interruption should have arrived timely
         assert last_duration <= 2.0
-        e = exc_info.__context__
+        e = exc_info.__context__  # type: ignore[possibly-missing-attribute, misc]
         assert isinstance(e, pa.ArrowCancelled)
         assert e.signum == signal.SIGINT
 
@@ -1866,6 +1872,9 @@ def use_threads(self):
 
 
 class BaseTestCompressedCSVRead:
+    def write_file(self, path, contents):
+        pass
+    csv_filename = ""
 
     def setUp(self):
         self.tmpdir = tempfile.mkdtemp(prefix='arrow-csv-test-')
@@ -1997,7 +2006,7 @@ def test_write_quoting_style():
             except Exception as e:
                 # This will trigger when we try to write a comma (,)
                 # without quotes, which is invalid
-                assert isinstance(e, res)
+                assert isinstance(e, res)  # type: ignore[invalid-argument-type]
                 break
         assert buf.getvalue() == res
         buf.seek(0)
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index 054bf920b269..a84b343b3dd2 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -26,7 +26,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
@@ -47,7 +47,7 @@ def datadir(base_datadir):
 
 
 def random_path(prefix='feather_'):
-    return tempfile.mktemp(prefix=prefix)
+    return tempfile.mktemp(prefix=prefix)  # type: ignore[deprecated]
 
 
 @pytest.fixture(scope="module", params=[1, 2])
@@ -63,7 +63,7 @@ def compression(request):
     yield request.param
 
 
-TEST_FILES = None
+TEST_FILES: list[str] | None = None
 
 
 def setup_module(module):
@@ -72,7 +72,7 @@ def setup_module(module):
 
 
 def teardown_module(module):
-    for path in TEST_FILES:
+    for path in TEST_FILES:  # type: ignore[union-attr]
         try:
             os.remove(path)
         except os.error:
@@ -95,6 +95,7 @@ def _check_pandas_roundtrip(df, expected=None, path=None,
     if version is None:
         version = 2
 
+    assert TEST_FILES is not None
     TEST_FILES.append(path)
     write_feather(df, path, compression=compression,
                   compression_level=compression_level, version=version)
@@ -114,6 +115,7 @@ def _check_arrow_roundtrip(table, path=None, compression=None):
     if path is None:
         path = random_path()
 
+    assert TEST_FILES is not None
     TEST_FILES.append(path)
     write_feather(table, path, compression=compression)
     if not os.path.exists(path):
@@ -126,10 +128,12 @@ def _check_arrow_roundtrip(table, path=None, compression=None):
 def _assert_error_on_write(df, exc, path=None, version=2):
     # check that we are raising the exception
     # on writing
+    assert version in (1, 2)
 
     if path is None:
         path = random_path()
 
+    assert TEST_FILES is not None
     TEST_FILES.append(path)
 
     def f():
@@ -149,6 +153,7 @@ def test_dataset(version):
     }
     table = pa.table(data)
 
+    assert TEST_FILES is not None
     TEST_FILES.extend(paths)
     for index, path in enumerate(paths):
         rows = (
@@ -156,7 +161,8 @@ def test_dataset(version):
             (index + 1) * (num_values[0] // num_files),
         )
 
-        write_feather(table[rows[0]: rows[1]], path, version=version)
+        write_feather(table[rows[0]: rows[1]], path,
+                      version=version)  # type: ignore[arg-type]
 
     data = FeatherDataset(paths).read_table()
     assert data.equals(table)
@@ -181,6 +187,7 @@ def test_read_table(version):
     num_values = (100, 100)
     path = random_path()
 
+    assert TEST_FILES is not None
     TEST_FILES.append(path)
 
     values = np.random.randint(0, 100, size=num_values)
@@ -206,6 +213,7 @@ def test_use_threads(version):
     num_values = (10, 10)
     path = random_path()
 
+    assert TEST_FILES is not None
     TEST_FILES.append(path)
 
     values = np.random.randint(0, 10, size=num_values)
@@ -231,6 +239,7 @@ def test_float_nulls(version):
     num_values = 100
 
     path = random_path()
+    assert TEST_FILES is not None
     TEST_FILES.append(path)
 
     null_mask = np.random.randint(0, 10, size=num_values) < 3
@@ -292,6 +301,7 @@ def test_platform_numpy_integers(version):
 def test_integer_with_nulls(version):
     # pandas requires upcast to float dtype
     path = random_path()
+    assert TEST_FILES is not None
     TEST_FILES.append(path)
 
     int_dtypes = ['i1', 'i2', 'i4', 'i8', 'u1', 'u2', 'u4', 'u8']
@@ -330,6 +340,7 @@ def test_boolean_no_nulls(version):
 def test_boolean_nulls(version):
     # pandas requires upcast to object dtype
     path = random_path()
+    assert TEST_FILES is not None
     TEST_FILES.append(path)
 
     num_values = 100
@@ -348,6 +359,7 @@ def test_boolean_nulls(version):
 def test_buffer_bounds_error(version):
     # ARROW-1676
     path = random_path()
+    assert TEST_FILES is not None
     TEST_FILES.append(path)
 
     for i in range(16, 256):
@@ -360,6 +372,7 @@ def test_buffer_bounds_error(version):
 
 @pytest.mark.numpy
 def test_boolean_object_nulls(version):
+    assert np is not None
     repeats = 100
     table = pa.Table.from_arrays(
         [np.array([False, None, True] * repeats, dtype=object)],
@@ -426,7 +439,8 @@ def test_empty_strings(version):
 @pytest.mark.pandas
 def test_all_none(version):
     df = pd.DataFrame({'all_none': [None] * 10})
-    if version == 1 and pa.pandas_compat._pandas_api.uses_string_dtype():
+    if (version == 1 and pa.pandas_compat  # type: ignore[attr-defined]
+            ._pandas_api.uses_string_dtype()):
         expected = df.astype("str")
     else:
         expected = df
@@ -552,6 +566,7 @@ def test_read_columns(version):
 @pytest.mark.numpy
 def test_overwritten_file(version):
     path = random_path()
+    assert TEST_FILES is not None
     TEST_FILES.append(path)
 
     num_values = 100
@@ -585,12 +600,12 @@ def test_filelike_objects(version):
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
 @pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
 def test_sparse_dataframe(version):
-    if not pa.pandas_compat._pandas_api.has_sparse:
+    if not pa.pandas_compat._pandas_api.has_sparse:  # type: ignore[attr-defined]
         pytest.skip("version of pandas does not support SparseDataFrame")
     # GH #221
     data = {'A': [0, 1, 2],
             'B': [1, 0, 1]}
-    df = pd.DataFrame(data).to_sparse(fill_value=1)
+    df = pd.DataFrame(data).to_sparse(fill_value=1)  # type: ignore[attr-defined]
     expected = df.to_dense()
     _check_pandas_roundtrip(df, expected, version=version)
 
@@ -692,8 +707,9 @@ def test_v2_lz4_default_compression():
     if not pa.Codec.is_available('lz4_frame'):
         pytest.skip("LZ4 compression support is not built in C++")
 
+    assert np is not None
     # some highly compressible data
-    t = pa.table([np.repeat(0, 100000)], names=['f0'])
+    t = pa.table([np.repeat(0, 100000)], names=['f0'])  # type: ignore[arg-type]
 
     buf = io.BytesIO()
     write_feather(t, buf)
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index a6d3546e57c6..3837b553b8be 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -24,16 +24,17 @@
 import math
 import os
 import pathlib
-import pytest
+import pytest  # type: ignore[import-not-found]
 import random
 import sys
 import tempfile
+from typing import cast
 import weakref
 
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 
 from pyarrow.util import guid
 from pyarrow import Codec
@@ -44,7 +45,7 @@ def check_large_seeks(file_factory, expected_error=None):
     if sys.platform in ('win32', 'darwin', 'emscripten'):
         pytest.skip("need sparse file support")
     try:
-        filename = tempfile.mktemp(prefix='test_io')
+        filename = tempfile.mkstemp(prefix='test_io')[1]
         with open(filename, 'wb') as f:
             f.truncate(2 ** 32 + 10)
             f.seek(2 ** 32 + 5)
@@ -234,7 +235,7 @@ def read_buffer(self, nbytes):
             return memoryview(dst_buf)[:nbytes]
 
     duck_reader = DuckReader()
-    with pa.PythonFile(duck_reader, mode='r') as f:
+    with pa.PythonFile(duck_reader, mode='r') as f:  # type: ignore[arg-type]
         buf = f.read_buffer(length)
         assert len(buf) == length
         assert memoryview(buf).tobytes() == dst_buf[:length]
@@ -474,7 +475,7 @@ def test_buffer_to_numpy():
     byte_array = bytearray(20)
     byte_array[0] = 42
     buf = pa.py_buffer(byte_array)
-    array = np.frombuffer(buf, dtype="uint8")
+    array = np.frombuffer(buf, dtype="uint8")  # type: ignore[arg-type]
     assert array[0] == byte_array[0]
     byte_array[0] += 1
     assert array[0] == byte_array[0]
@@ -557,7 +558,7 @@ def test_buffer_eq_bytes():
     assert buf != b'some dat1'
 
     with pytest.raises(TypeError):
-        buf == 'some data'
+        _ = buf == 'some data'
 
 
 def test_buffer_getitem():
@@ -598,22 +599,22 @@ def test_buffer_slicing():
 
     with pytest.raises(IndexError):
         buf.slice(len(buf) + 1)
-    assert buf[11:].to_pybytes() == b""
+    assert cast(pa.Buffer, buf[11:]).to_pybytes() == b""
 
     # Slice stop exceeds buffer length
     with pytest.raises(IndexError):
         buf.slice(1, len(buf))
-    assert buf[1:11].to_pybytes() == buf.to_pybytes()[1:]
+    assert cast(pa.Buffer, buf[1:11]).to_pybytes() == buf.to_pybytes()[1:]
 
     # Negative length
     with pytest.raises(IndexError):
         buf.slice(1, -1)
 
     # Test slice notation
-    assert buf[2:].equals(buf.slice(2))
-    assert buf[2:5].equals(buf.slice(2, 3))
-    assert buf[-5:].equals(buf.slice(len(buf) - 5))
-    assert buf[-5:-2].equals(buf.slice(len(buf) - 5, 3))
+    assert cast(pa.Buffer, buf[2:]).equals(buf.slice(2))
+    assert cast(pa.Buffer, buf[2:5]).equals(buf.slice(2, 3))
+    assert cast(pa.Buffer, buf[-5:]).equals(buf.slice(len(buf) - 5))
+    assert cast(pa.Buffer, buf[-5:-2]).equals(buf.slice(len(buf) - 5, 3))
 
     with pytest.raises(IndexError):
         buf[::-1]
@@ -623,7 +624,8 @@ def test_buffer_slicing():
     n = len(buf)
     for start in range(-n * 2, n * 2):
         for stop in range(-n * 2, n * 2):
-            assert buf[start:stop].to_pybytes() == buf.to_pybytes()[start:stop]
+            assert cast(pa.Buffer, buf[start:stop]).to_pybytes(
+            ) == buf.to_pybytes()[start:stop]
 
 
 def test_buffer_hashing():
@@ -640,7 +642,7 @@ def test_buffer_protocol_respects_immutability():
     # immutable
     a = b'12345'
     arrow_ref = pa.py_buffer(a)
-    numpy_ref = np.frombuffer(arrow_ref, dtype=np.uint8)
+    numpy_ref = np.frombuffer(arrow_ref, dtype=np.uint8)  # type: ignore[arg-type]
     assert not numpy_ref.flags.writeable
 
 
@@ -652,7 +654,8 @@ def test_foreign_buffer():
     buf = pa.foreign_buffer(addr, size, obj)
     wr = weakref.ref(obj)
     del obj
-    assert np.frombuffer(buf, dtype=np.int32).tolist() == [1, 2]
+    assert (np.frombuffer(buf, dtype=np.int32).tolist()  # type: ignore[arg-type]
+            == [1, 2])
     assert wr() is not None
     del buf
     assert wr() is None
@@ -688,6 +691,7 @@ def test_non_cpu_buffer(pickle_module):
     cuda_buf = ctx.buffer_from_data(data)
     arr = pa.FixedSizeBinaryArray.from_buffers(pa.binary(7), 1, [None, cuda_buf])
     buf_on_gpu = arr.buffers()[1]
+    assert buf_on_gpu is not None
 
     assert buf_on_gpu.size == cuda_buf.size
     assert buf_on_gpu.address == cuda_buf.address
@@ -708,7 +712,7 @@ def test_non_cpu_buffer(pickle_module):
     assert cuda_sliced.to_pybytes() == b'st'
 
     # Sliced buffers with same address
-    assert buf_on_gpu_sliced.equals(cuda_buf[2:4])
+    assert cast(pa.Buffer, buf_on_gpu_sliced).equals(cuda_buf[2:4])
 
     # Buffers on different devices
     msg_device = "Device on which the data resides differs between buffers"
@@ -720,13 +724,14 @@ def test_non_cpu_buffer(pickle_module):
     arr_short = np.array([b'sting'])
     cuda_buf_short = ctx.buffer_from_data(arr_short)
     with pytest.raises(NotImplementedError, match=msg):
-        buf_on_gpu_sliced.equals(cuda_buf_short)
+        cast(pa.Buffer, buf_on_gpu_sliced).equals(cuda_buf_short)
     arr_short = pa.FixedSizeBinaryArray.from_buffers(
         pa.binary(5), 1, [None, cuda_buf_short]
     )
     buf_on_gpu_short = arr_short.buffers()[1]
+    assert buf_on_gpu_short is not None
     with pytest.raises(NotImplementedError, match=msg):
-        buf_on_gpu_sliced.equals(buf_on_gpu_short)
+        cast(pa.Buffer, buf_on_gpu_sliced).equals(buf_on_gpu_short)
 
     with pytest.raises(NotImplementedError, match=msg):
         buf_on_gpu.hex()
@@ -811,8 +816,9 @@ def test_cache_options_pickling(pickle_module):
 
 @pytest.mark.numpy
 @pytest.mark.parametrize("compression", [
-    pytest.param(
-        "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
+    pytest.param("bz2", marks=pytest.mark.xfail(
+        raises=pa.lib.ArrowNotImplementedError  # type: ignore[attr-defined]
+    )
     ),
     "brotli",
     "gzip",
@@ -843,6 +849,7 @@ def test_compress_decompress(compression):
 
     assert isinstance(decompressed_bytes, bytes)
 
+    assert isinstance(decompressed_buf, pa.Buffer)
     assert decompressed_buf.equals(test_buf)
     assert decompressed_bytes == test_data
 
@@ -852,8 +859,9 @@ def test_compress_decompress(compression):
 
 @pytest.mark.numpy
 @pytest.mark.parametrize("compression", [
-    pytest.param(
-        "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
+    pytest.param("bz2", marks=pytest.mark.xfail(
+        raises=pa.lib.ArrowNotImplementedError  # type: ignore[attr-defined]
+    )
     ),
     "brotli",
     "gzip",
@@ -910,6 +918,7 @@ def test_compression_level(compression):
 
         assert isinstance(decompressed_bytes, bytes)
 
+        assert isinstance(decompressed_buf, pa.Buffer)
         assert decompressed_buf.equals(test_buf)
         assert decompressed_bytes == test_data
 
@@ -951,12 +960,12 @@ def test_buffer_memoryview_is_immutable():
     assert result.readonly
 
     with pytest.raises(TypeError) as exc:
-        result[0] = b'h'
+        result[0] = b'h'  # type: ignore[index]
         assert 'cannot modify read-only' in str(exc.value)
 
     b = bytes(buf)
     with pytest.raises(TypeError) as exc:
-        b[0] = b'h'
+        b[0] = b'h'  # type: ignore[index]
         assert 'cannot modify read-only' in str(exc.value)
 
 
@@ -1748,9 +1757,9 @@ def test_unknown_compression_raises():
     "gzip",
     "lz4",
     "zstd",
-    pytest.param(
-        "snappy",
-        marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
+    pytest.param("snappy", marks=pytest.mark.xfail(
+        raises=pa.lib.ArrowNotImplementedError  # type: ignore[attr-defined]
+    )
     )
 ])
 def test_compressed_roundtrip(compression):
@@ -2021,7 +2030,7 @@ def test_input_stream_native_file():
 def test_input_stream_errors(tmpdir):
     buf = memoryview(b"")
     with pytest.raises(ValueError):
-        pa.input_stream(buf, compression="foo")
+        pa.input_stream(buf, compression="foo")  # type: ignore[reportArgumentType]
 
     for arg in [bytearray(), StringIO()]:
         with pytest.raises(TypeError):
@@ -2198,7 +2207,7 @@ def check_data(data, **kwargs):
 def test_output_stream_errors(tmpdir):
     buf = memoryview(bytearray())
     with pytest.raises(ValueError):
-        pa.output_stream(buf, compression="foo")
+        pa.output_stream(buf, compression="foo")  # type: ignore[reportArgumentType]
 
     for arg in [bytearray(), StringIO()]:
         with pytest.raises(TypeError):
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 6813ed777234..93b9e7f1aa02 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -24,23 +24,27 @@
 import socket
 import threading
 import weakref
+from typing import TYPE_CHECKING, cast
 
-try:
+if TYPE_CHECKING:
     import numpy as np
-except ImportError:
-    np = None
+    import pandas as pd
+    from pandas.testing import assert_frame_equal
+else:
+    try:
+        import numpy as np
+    except ImportError:
+        pass
+    try:
+        from pandas.testing import assert_frame_equal
+        import pandas as pd
+    except ImportError:
+        pass
 
 import pyarrow as pa
 from pyarrow.tests.util import changed_environ, invoke_script
 
 
-try:
-    from pandas.testing import assert_frame_equal
-    import pandas as pd
-except ImportError:
-    pass
-
-
 class IpcFixture:
     write_stats = None
 
@@ -48,6 +52,9 @@ def __init__(self, sink_factory=lambda: io.BytesIO()):
         self._sink_factory = sink_factory
         self.sink = self.get_sink()
 
+    def _get_writer(self, sink, schema):
+        ...  # Implemented in subclasses
+
     def get_sink(self):
         return self._sink_factory()
 
@@ -59,6 +66,7 @@ def write_batches(self, num_batches=5, as_table=False):
         schema = pa.schema([('one', pa.float64()), ('two', pa.utf8())])
 
         writer = self._get_writer(self.sink, schema)
+        assert writer is not None
 
         batches = []
         for i in range(num_batches):
@@ -385,7 +393,8 @@ def test_stream_write_table_batches(stream_fixture):
         'one': np.random.randn(20),
     })
 
-    b1 = pa.RecordBatch.from_pandas(df[:10], preserve_index=False)
+    b1 = pa.RecordBatch.from_pandas(
+        df[:10], preserve_index=False)  # type: ignore[arg-type]
     b2 = pa.RecordBatch.from_pandas(df, preserve_index=False)
 
     table = pa.Table.from_batches([b1, b2, b1])
@@ -941,7 +950,7 @@ def test_ipc_file_stream_has_eos():
     buffer = sink.getvalue()
 
     # skip the file magic
-    reader = pa.ipc.open_stream(buffer[8:])
+    reader = pa.ipc.open_stream(cast(pa.Buffer, buffer[8:]))
 
     # will fail if encounters footer data instead of eos
     rdf = reader.read_pandas()
@@ -980,7 +989,8 @@ def test_batches_with_custom_metadata_roundtrip(ipc_type):
 
     with file_factory(sink, batch.schema) as writer:
         for i in range(batch_count):
-            writer.write_batch(batch, custom_metadata={"batch_id": str(i)})
+            writer.write_batch(batch, custom_metadata={  # type: ignore[arg-type]
+                "batch_id": str(i)})
         # write a batch without custom metadata
         writer.write_batch(batch)
 
diff --git a/python/pyarrow/tests/test_json.py b/python/pyarrow/tests/test_json.py
index c3f9fe333bd0..c0b6b8ecd0d5 100644
--- a/python/pyarrow/tests/test_json.py
+++ b/python/pyarrow/tests/test_json.py
@@ -23,11 +23,16 @@
 import json
 import string
 import unittest
+from typing import TYPE_CHECKING
 
-try:
+if TYPE_CHECKING:
     import numpy as np
-except ImportError:
-    np = None
+else:
+    try:
+        import numpy as np
+    except ImportError:
+        pass
+
 import pytest
 
 import pyarrow as pa
@@ -317,6 +322,9 @@ def test_stress_block_sizes(self):
 
 class BaseTestJSONRead(BaseTestJSON):
 
+    def read_json(self, *args, **kwargs) -> pa.Table:  # type: ignore[empty-body]
+        ...  # Implemented in subclasses
+
     def read_bytes(self, b, **kwargs):
         return self.read_json(pa.py_buffer(b), **kwargs)
 
@@ -352,6 +360,8 @@ def test_reconcile_across_blocks(self):
 
 
 class BaseTestStreamingJSONRead(BaseTestJSON):
+    use_threads: bool = False  # Set by subclasses
+
     def open_json(self, json, *args, **kwargs):
         """
         Reads the JSON file into memory using pyarrow's open_json
diff --git a/python/pyarrow/tests/test_orc.py b/python/pyarrow/tests/test_orc.py
index 27154a6f34f3..d0e61d758cb2 100644
--- a/python/pyarrow/tests/test_orc.py
+++ b/python/pyarrow/tests/test_orc.py
@@ -77,7 +77,7 @@ def fix_example_values(actual_cols, expected_cols):
                 if not pd.isnull(v):
                     exp = d.as_tuple().exponent
                     factor = 10 ** -exp
-                    converted_decimals[i] = (
+                    converted_decimals[i] = (  # type: ignore[call-overload,assignment]
                         decimal.Decimal(round(v * factor)).scaleb(exp))
             expected = pd.Series(converted_decimals)
 
@@ -314,7 +314,7 @@ def test_buffer_readwrite():
     # deprecated keyword order
     buffer_output_stream = pa.BufferOutputStream()
     with pytest.warns(FutureWarning):
-        orc.write_table(buffer_output_stream, table)
+        orc.write_table(buffer_output_stream, table)  # type: ignore[arg-type]
     buffer_reader = pa.BufferReader(buffer_output_stream.getvalue())
     orc_file = orc.ORCFile(buffer_reader)
     output_table = orc_file.read()
@@ -356,8 +356,8 @@ def test_buffer_readwrite_with_writeoptions():
     buffer_output_stream = pa.BufferOutputStream()
     with pytest.warns(FutureWarning):
         orc.write_table(
-            buffer_output_stream,
-            table,
+            buffer_output_stream,  # type: ignore[reportArgumentType]
+            table,  # type: ignore[reportArgumentType]
             compression='uncompressed',
             file_version='0.11',
             row_index_stride=20000,
@@ -444,20 +444,20 @@ def test_buffer_readwrite_with_bad_writeoptions():
         orc.write_table(
             table,
             buffer_output_stream,
-            compression=0,
+            compression=0,  # type: ignore[reportArgumentType]
         )
 
     with pytest.raises(ValueError):
         orc.write_table(
             table,
             buffer_output_stream,
-            compression='none',
+            compression='none',  # type: ignore[reportArgumentType]
         )
     with pytest.raises(ValueError):
         orc.write_table(
             table,
             buffer_output_stream,
-            compression='zlid',
+            compression='zlid',  # type: ignore[reportArgumentType]
         )
 
     # compression_block_size must be a positive integer
@@ -487,20 +487,20 @@ def test_buffer_readwrite_with_bad_writeoptions():
         orc.write_table(
             table,
             buffer_output_stream,
-            compression_strategy=0,
+            compression_strategy=0,  # type: ignore[reportArgumentType]
         )
 
     with pytest.raises(ValueError):
         orc.write_table(
             table,
             buffer_output_stream,
-            compression_strategy='no',
+            compression_strategy='no',  # type: ignore[reportArgumentType]
         )
     with pytest.raises(ValueError):
         orc.write_table(
             table,
             buffer_output_stream,
-            compression_strategy='large',
+            compression_strategy='large',  # type: ignore[reportArgumentType]
         )
 
     # row_index_stride must be a positive integer