From 5b8e3a8d22ab3fb2779f6fe7211a5e340e9a4afa Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Thu, 17 Apr 2025 22:49:58 -0400 Subject: [PATCH] fix: don't traverse into excluded directories Signed-off-by: Henry Schreiner --- .../build/_file_processor.py | 117 +++++++++++++----- 1 file changed, 89 insertions(+), 28 deletions(-) diff --git a/src/scikit_build_core/build/_file_processor.py b/src/scikit_build_core/build/_file_processor.py index f084d3a4..e62df529 100644 --- a/src/scikit_build_core/build/_file_processor.py +++ b/src/scikit_build_core/build/_file_processor.py @@ -7,7 +7,8 @@ import pathspec -from scikit_build_core.format import pyproject_format +from .._logging import logger +from ..format import pyproject_format if TYPE_CHECKING: from collections.abc import Generator, Sequence @@ -65,33 +66,93 @@ def each_unignored_file( include_spec = pathspec.GitIgnoreSpec.from_lines(include) - for dirstr, _, filenames in os.walk(str(starting_path), followlinks=True): + for dirstr, dirs, filenames in os.walk(str(starting_path), followlinks=True): dirpath = Path(dirstr) - all_paths = (dirpath / fn for fn in filenames) - for p in all_paths: - # Always include something included - if include_spec.match_file(p): - yield p - continue - - # Always exclude something excluded - if user_exclude_spec.match_file(p): - continue - - # Ignore from global ignore - if global_exclude_spec.match_file(p): - continue - - # Ignore built-in patterns - if builtin_exclude_spec.match_file(p): - continue - - # Check relative ignores (Python 3.9's is_relative_to workaround) - if any( - nex.match_file(p.relative_to(np)) - for np, nex in nested_excludes.items() - if dirpath == np or np in dirpath.parents + for dname in dirs: + if not match_path( + dirpath, + dirpath / dname, + include_spec, + global_exclude_spec, + builtin_exclude_spec, + user_exclude_spec, + nested_excludes, + is_path=True, ): - continue + dirs.remove(dname) + + for fn in filenames: + path = dirpath / fn + if match_path( + dirpath, + path, + include_spec, + global_exclude_spec, + builtin_exclude_spec, + user_exclude_spec, + nested_excludes, + is_path=False, + ): + yield path + + +def match_path( + dirpath: Path, + p: Path, + include_spec: pathspec.GitIgnoreSpec, + global_exclude_spec: pathspec.GitIgnoreSpec, + builtin_exclude_spec: pathspec.GitIgnoreSpec, + user_exclude_spec: pathspec.GitIgnoreSpec, + nested_excludes: dict[Path, pathspec.GitIgnoreSpec], + *, + is_path: bool, +) -> bool: + ptype = "directory" if is_path else "file" + + # Always include something included + if include_spec.match_file(p): + logger.info("Including {} {} because it is explicitly included.", ptype, p) + return True + + # Always exclude something excluded + if user_exclude_spec.match_file(p): + logger.info( + "Excluding {} {} because it is explicitly excluded by the user.", ptype, p + ) + return False + + # Ignore from global ignore + if global_exclude_spec.match_file(p): + logger.info( + "Excluding {} {} because it is explicitly excluded by the global ignore.", + ptype, + p, + ) + return False + + # Ignore built-in patterns + if builtin_exclude_spec.match_file(p): + logger.info( + "Excluding {} {} because it is explicitly excluded by the built-in ignore.", + ptype, + p, + ) + return False + + # Check relative ignores (Python 3.9's is_relative_to workaround) + if any( + nex.match_file(p.relative_to(np)) + for np, nex in nested_excludes.items() + if dirpath == np or np in dirpath.parents + ): + logger.info( + "Excluding {} {} because it is explicitly included by nested ignore.", + ptype, + p, + ) + return False - yield p + logger.info( + "Including {} {} because it exists (and isn't matched any other way).", ptype, p + ) + return True