Skip to content

Commit fe17e19

Browse files
authored
Merge pull request #318 from lincc-frameworks/speedup-cloud-read
Speedup cloud reads
2 parents 82cb2e8 + 5e24a36 commit fe17e19

File tree

1 file changed

+17
-4
lines changed
  • src/nested_pandas/nestedframe

1 file changed

+17
-4
lines changed

src/nested_pandas/nestedframe/io.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import pandas as pd
77
import pyarrow as pa
8+
import pyarrow.fs
89
import pyarrow.parquet as pq
910
from upath import UPath
1011

@@ -13,6 +14,9 @@
1314
from ..series.utils import table_to_struct_array
1415
from .core import NestedFrame
1516

17+
# Use smaller block size for FSSPEC filesystems, it usually helps with parquet reads
18+
FSSPEC_BLOCK_SIZE = 32 * 1024
19+
1620

1721
def read_parquet(
1822
data: str | UPath | bytes,
@@ -96,10 +100,19 @@ def read_parquet(
96100
# If `data` is a file-like object or a sequence, pass it directly to pyarrow
97101
table = pq.read_table(data, columns=columns, **kwargs)
98102
else:
99-
# Otherwise, treat `data` as a file path and use UPath
100-
path = UPath(data)
101-
filesystem = kwargs.pop("filesystem", path.fs)
102-
table = pq.read_table(path.path, columns=columns, filesystem=filesystem, **kwargs)
103+
# Try creating pyarrow-native filesystem
104+
try:
105+
fs, path = pa.fs.FileSystem.from_uri(data)
106+
except (TypeError, pa.ArrowInvalid):
107+
# Otherwise, treat `data` as an URI for fsspec-supported silesystem and use UPath
108+
upath = UPath(data)
109+
# Use smaller block size for better performance
110+
if upath.protocol in ("http", "https"):
111+
upath = UPath(upath, block_size=FSSPEC_BLOCK_SIZE)
112+
path = upath.path
113+
fs = upath.fs
114+
filesystem = kwargs.pop("filesystem", fs)
115+
table = pq.read_table(path, columns=columns, filesystem=filesystem, **kwargs)
103116

104117
# Resolve partial loading of nested structures
105118
# Using pyarrow to avoid naming conflicts from partial loading ("flux" vs "lc.flux")

0 commit comments

Comments
 (0)