Skip to content

fix: don't traverse into excluded directories #1048

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 89 additions & 28 deletions src/scikit_build_core/build/_file_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@

import pathspec

from scikit_build_core.format import pyproject_format
from .._logging import logger
from ..format import pyproject_format

if TYPE_CHECKING:
from collections.abc import Generator, Sequence
Expand Down Expand Up @@ -65,33 +66,93 @@ def each_unignored_file(

include_spec = pathspec.GitIgnoreSpec.from_lines(include)

for dirstr, _, filenames in os.walk(str(starting_path), followlinks=True):
for dirstr, dirs, filenames in os.walk(str(starting_path), followlinks=True):
dirpath = Path(dirstr)
all_paths = (dirpath / fn for fn in filenames)
for p in all_paths:
# Always include something included
if include_spec.match_file(p):
yield p
continue

# Always exclude something excluded
if user_exclude_spec.match_file(p):
continue

# Ignore from global ignore
if global_exclude_spec.match_file(p):
continue

# Ignore built-in patterns
if builtin_exclude_spec.match_file(p):
continue

# Check relative ignores (Python 3.9's is_relative_to workaround)
if any(
nex.match_file(p.relative_to(np))
for np, nex in nested_excludes.items()
if dirpath == np or np in dirpath.parents
for dname in dirs:
if not match_path(
dirpath,
dirpath / dname,
include_spec,
global_exclude_spec,
builtin_exclude_spec,
user_exclude_spec,
nested_excludes,
is_path=True,
):
continue
dirs.remove(dname)

for fn in filenames:
path = dirpath / fn
if match_path(
dirpath,
path,
include_spec,
global_exclude_spec,
builtin_exclude_spec,
user_exclude_spec,
nested_excludes,
is_path=False,
):
yield path


def match_path(
dirpath: Path,
p: Path,
include_spec: pathspec.GitIgnoreSpec,
global_exclude_spec: pathspec.GitIgnoreSpec,
builtin_exclude_spec: pathspec.GitIgnoreSpec,
user_exclude_spec: pathspec.GitIgnoreSpec,
nested_excludes: dict[Path, pathspec.GitIgnoreSpec],
*,
is_path: bool,
) -> bool:
ptype = "directory" if is_path else "file"

# Always include something included
if include_spec.match_file(p):
logger.info("Including {} {} because it is explicitly included.", ptype, p)
return True

# Always exclude something excluded
if user_exclude_spec.match_file(p):
logger.info(
"Excluding {} {} because it is explicitly excluded by the user.", ptype, p
)
return False

# Ignore from global ignore
if global_exclude_spec.match_file(p):
logger.info(
"Excluding {} {} because it is explicitly excluded by the global ignore.",
ptype,
p,
)
return False

# Ignore built-in patterns
if builtin_exclude_spec.match_file(p):
logger.info(
"Excluding {} {} because it is explicitly excluded by the built-in ignore.",
ptype,
p,
)
return False

# Check relative ignores (Python 3.9's is_relative_to workaround)
if any(
nex.match_file(p.relative_to(np))
for np, nex in nested_excludes.items()
if dirpath == np or np in dirpath.parents
):
logger.info(
"Excluding {} {} because it is explicitly included by nested ignore.",
ptype,
p,
)
return False

yield p
logger.info(
"Including {} {} because it exists (and isn't matched any other way).", ptype, p
)
return True
Loading