Skip to content

Commit a99320b

Browse files
fix(fsspec): handle zero-byte files in __len__ (#3353)
## Summary Fix `FsspecInputFile.__len__` and `FsspecOutputFile.__len__` so zero-byte files return `0` instead of being treated as missing metadata. Both methods previously used truthiness checks on `object_info.get(...)`, which caused valid sizes like `0` to fall through to the runtime error path. ## Changes - check for `Size` key presence explicitly - check for `size` key presence explicitly - add a regression test covering zero-byte lengths for both metadata key variants ## Verification - `python -m pytest tests/io/test_fsspec.py -k zero_length_of_file -q`
1 parent 4b3ccbb commit a99320b

2 files changed

Lines changed: 22 additions & 8 deletions

File tree

pyiceberg/io/fsspec.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -335,10 +335,10 @@ def __init__(self, location: str, fs: AbstractFileSystem):
335335
def __len__(self) -> int:
336336
"""Return the total length of the file, in bytes."""
337337
object_info = self._fs.info(self.location)
338-
if size := object_info.get("Size"):
339-
return size
340-
elif size := object_info.get("size"):
341-
return size
338+
if "Size" in object_info:
339+
return object_info["Size"]
340+
elif "size" in object_info:
341+
return object_info["size"]
342342
raise RuntimeError(f"Cannot retrieve object info: {self.location}")
343343

344344
def exists(self) -> bool:
@@ -379,10 +379,10 @@ def __init__(self, location: str, fs: AbstractFileSystem):
379379
def __len__(self) -> int:
380380
"""Return the total length of the file, in bytes."""
381381
object_info = self._fs.info(self.location)
382-
if size := object_info.get("Size"):
383-
return size
384-
elif size := object_info.get("size"):
385-
return size
382+
if "Size" in object_info:
383+
return object_info["Size"]
384+
elif "size" in object_info:
385+
return object_info["size"]
386386
raise RuntimeError(f"Cannot retrieve object info: {self.location}")
387387

388388
def exists(self) -> bool:

tests/io/test_fsspec.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,20 @@ def test_fsspec_getting_length_of_file(fsspec_fileio: FsspecFileIO) -> None:
146146
fsspec_fileio.delete(output_file)
147147

148148

149+
@pytest.mark.parametrize("size_key", ["Size", "size"])
150+
def test_fsspec_getting_zero_length_of_file(size_key: str) -> None:
151+
"""Test getting zero-byte lengths from object metadata."""
152+
location = "s3://warehouse/empty-file"
153+
fs = mock.Mock(spec=AbstractFileSystem)
154+
fs.info.return_value = {size_key: 0}
155+
156+
output_file = fsspec.FsspecOutputFile(location=location, fs=fs)
157+
assert len(output_file) == 0
158+
159+
input_file = fsspec.FsspecInputFile(location=location, fs=fs)
160+
assert len(input_file) == 0
161+
162+
149163
@pytest.mark.s3
150164
def test_fsspec_file_tell(fsspec_fileio: FsspecFileIO) -> None:
151165
"""Test finding cursor position for an fsspec file-io file"""

0 commit comments

Comments
 (0)