From 568115656d13fdeb8743499911925326a2c13cc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Salom=C3=A9=20Voltz?= Date: Mon, 20 Oct 2025 18:12:07 +0200 Subject: [PATCH] fix(secret): skip non-seekable files during scanning --- ..._oserror_errno_22_invalid_argument_when.md | 42 +++++++++++++++++++ ggshield/core/scan/__init__.py | 3 +- ggshield/core/scan/scannable.py | 15 +++++-- ggshield/verticals/secret/secret_scanner.py | 4 ++ tests/unit/core/scan/test_scannable.py | 24 ++++++++++- 5 files changed, 83 insertions(+), 5 deletions(-) create mode 100644 changelog.d/20251020_174722_salome.voltz_scrt_5971_ggshield_runs_into_oserror_errno_22_invalid_argument_when.md diff --git a/changelog.d/20251020_174722_salome.voltz_scrt_5971_ggshield_runs_into_oserror_errno_22_invalid_argument_when.md b/changelog.d/20251020_174722_salome.voltz_scrt_5971_ggshield_runs_into_oserror_errno_22_invalid_argument_when.md new file mode 100644 index 0000000000..96fc686e23 --- /dev/null +++ b/changelog.d/20251020_174722_salome.voltz_scrt_5971_ggshield_runs_into_oserror_errno_22_invalid_argument_when.md @@ -0,0 +1,42 @@ + + + + + + + +### Fixed + +- Skip non-seekable files instead of crashing. + + diff --git a/ggshield/core/scan/__init__.py b/ggshield/core/scan/__init__.py index 71592b3ed1..b37b6197f6 100644 --- a/ggshield/core/scan/__init__.py +++ b/ggshield/core/scan/__init__.py @@ -2,7 +2,7 @@ from .file import File, create_files_from_paths from .scan_context import ScanContext from .scan_mode import ScanMode -from .scannable import DecodeError, Scannable, StringScannable +from .scannable import DecodeError, NonSeekableFileError, Scannable, StringScannable __all__ = [ @@ -10,6 +10,7 @@ "Commit", "DecodeError", "File", + "NonSeekableFileError", "ScanContext", "ScanMode", "Scannable", diff --git a/ggshield/core/scan/scannable.py b/ggshield/core/scan/scannable.py index 9f664025ef..4a0ab411ed 100644 --- a/ggshield/core/scan/scannable.py +++ b/ggshield/core/scan/scannable.py @@ -31,6 +31,12 @@ class DecodeError(Exception): pass +class NonSeekableFileError(Exception): + """Raised when a file cannot be seeked""" + + pass + + class Scannable(ABC): """Base class for content that can be scanned by GGShield""" @@ -143,8 +149,12 @@ def _is_file_longer_than( Raises DecodeError if the file cannot be decoded. """ # Get the byte size - assert fp.seekable() - byte_size = fp.seek(0, SEEK_END) + # Note: IOBase.seekable() returns True on some non-seekable files like /proc/self/mounts + try: + byte_size = fp.seek(0, SEEK_END) + fp.seek(0, SEEK_SET) + except OSError as exc: + raise NonSeekableFileError() from exc if byte_size > max_utf8_encoded_size * UTF8_TO_WORSE_OTHER_ENCODING_RATIO: # Even if the file used the worst encoding (UTF-32), encoding the content of @@ -153,7 +163,6 @@ def _is_file_longer_than( return True, None, None # Determine the encoding - fp.seek(0, SEEK_SET) charset_matches = charset_normalizer.from_fp(fp) charset_match = charset_matches.best() if charset_match is None: diff --git a/ggshield/verticals/secret/secret_scanner.py b/ggshield/verticals/secret/secret_scanner.py index e323eee8ca..dc97cde4a6 100644 --- a/ggshield/verticals/secret/secret_scanner.py +++ b/ggshield/verticals/secret/secret_scanner.py @@ -16,6 +16,7 @@ from ggshield.core.constants import MAX_WORKERS from ggshield.core.errors import handle_api_error from ggshield.core.scan import DecodeError, ScanContext, Scannable +from ggshield.core.scan.scannable import NonSeekableFileError from ggshield.core.scanner_ui.scanner_ui import ScannerUI from ggshield.core.text_utils import pluralize @@ -157,6 +158,9 @@ def _start_scans( except DecodeError: scanner_ui.on_skipped(scannable, "can't detect encoding") continue + except NonSeekableFileError: + scanner_ui.on_skipped(scannable, "file cannot be seeked") + continue if content: if ( diff --git a/tests/unit/core/scan/test_scannable.py b/tests/unit/core/scan/test_scannable.py index aea6bde43e..c1ada202bd 100644 --- a/tests/unit/core/scan/test_scannable.py +++ b/tests/unit/core/scan/test_scannable.py @@ -1,8 +1,10 @@ from pathlib import Path +from unittest.mock import patch import pytest -from ggshield.core.scan import StringScannable +from ggshield.core.scan import File, StringScannable +from ggshield.core.scan.scannable import NonSeekableFileError def test_string_scannable_path(): @@ -32,3 +34,23 @@ def test_string_scannable_is_longer_than(content, is_longer): """ scannable = StringScannable(content=content, url="u") assert scannable.is_longer_than(50) == is_longer + + +@patch("pathlib.Path.open") +def test_file_non_seekable(mock_open, tmp_path): + """ + GIVEN a File instance + AND the file reports as seekable but seeking operations fail + WHEN is_longer_than() is called on it + THEN it raises NonSeekableFileError + """ + mock_file = mock_open.return_value.__enter__.return_value + mock_file.seekable.return_value = True + mock_file.seek.side_effect = OSError(22, "Invalid argument") + + test_file = tmp_path / "test.txt" + test_file.write_text("test content") + file_obj = File(test_file) + + with pytest.raises(NonSeekableFileError): + file_obj.is_longer_than(1000)