diff --git a/python/codeql-extractor.yml b/python/codeql-extractor.yml index 97a9e1f2cf2f..2bd1a9c0aa76 100644 --- a/python/codeql-extractor.yml +++ b/python/codeql-extractor.yml @@ -44,3 +44,10 @@ options: Use this setting with caution, the Python extractor requires Python 3 to run. type: string pattern: "^(py|python|python3)$" + skip_hidden_directories: + title: Controls whether hidden directories are skipped during extraction. + description: > + By default, CodeQL will extract all Python files, including ones located in hidden directories. By setting this option to true, these hidden directories will be skipped instead. + Accepted values are true and false. + type: string + pattern: "^(true|false)$" diff --git a/python/extractor/cli-integration-test/hidden-files/query-default.expected b/python/extractor/cli-integration-test/hidden-files/query-default.expected new file mode 100644 index 000000000000..cc92af624b37 --- /dev/null +++ b/python/extractor/cli-integration-test/hidden-files/query-default.expected @@ -0,0 +1,5 @@ +| name | ++-------------------------------+ +| .hidden_file.py | +| foo.py | +| visible_file_in_hidden_dir.py | diff --git a/python/extractor/cli-integration-test/hidden-files/query-skipped.expected b/python/extractor/cli-integration-test/hidden-files/query-skipped.expected new file mode 100644 index 000000000000..688dbe00d570 --- /dev/null +++ b/python/extractor/cli-integration-test/hidden-files/query-skipped.expected @@ -0,0 +1,4 @@ +| name | ++-----------------+ +| .hidden_file.py | +| foo.py | diff --git a/python/extractor/cli-integration-test/hidden-files/query.ql b/python/extractor/cli-integration-test/hidden-files/query.ql new file mode 100644 index 000000000000..3b1b3c03849b --- /dev/null +++ b/python/extractor/cli-integration-test/hidden-files/query.ql @@ -0,0 +1,3 @@ +import python + +select any(File f).getShortName() as name order by name diff --git a/python/extractor/cli-integration-test/hidden-files/repo_dir/.hidden_dir/visible_file_in_hidden_dir.py b/python/extractor/cli-integration-test/hidden-files/repo_dir/.hidden_dir/visible_file_in_hidden_dir.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/extractor/cli-integration-test/hidden-files/repo_dir/.hidden_file.py b/python/extractor/cli-integration-test/hidden-files/repo_dir/.hidden_file.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/extractor/cli-integration-test/hidden-files/repo_dir/foo.py b/python/extractor/cli-integration-test/hidden-files/repo_dir/foo.py new file mode 100644 index 000000000000..517b47df53c2 --- /dev/null +++ b/python/extractor/cli-integration-test/hidden-files/repo_dir/foo.py @@ -0,0 +1 @@ +print(42) diff --git a/python/extractor/cli-integration-test/hidden-files/test.sh b/python/extractor/cli-integration-test/hidden-files/test.sh new file mode 100755 index 000000000000..77cb12664af6 --- /dev/null +++ b/python/extractor/cli-integration-test/hidden-files/test.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +set -Eeuo pipefail # see https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/ + +set -x + +CODEQL=${CODEQL:-codeql} + +SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +cd "$SCRIPTDIR" + +rm -rf db db-skipped + +# Test 1: Default behavior should be to extract files in hidden directories +$CODEQL database create db --language python --source-root repo_dir/ +$CODEQL query run --database db query.ql > query-default.actual +diff query-default.expected query-default.actual + +# Test 2: Setting the relevant extractor option to true skips files in hidden directories +$CODEQL database create db-skipped --language python --source-root repo_dir/ --extractor-option python.skip_hidden_directories=true +$CODEQL query run --database db-skipped query.ql > query-skipped.actual +diff query-skipped.expected query-skipped.actual + +rm -rf db db-skipped diff --git a/python/extractor/semmle/traverser.py b/python/extractor/semmle/traverser.py index ad8bd38ae735..0945d8ace4bf 100644 --- a/python/extractor/semmle/traverser.py +++ b/python/extractor/semmle/traverser.py @@ -83,11 +83,10 @@ def _treewalk(self, path): self.logger.debug("Ignoring %s (symlink)", fullpath) continue if isdir(fullpath): - if fullpath in self.exclude_paths or is_hidden(fullpath): - if is_hidden(fullpath): - self.logger.debug("Ignoring %s (hidden)", fullpath) - else: - self.logger.debug("Ignoring %s (excluded)", fullpath) + if fullpath in self.exclude_paths: + self.logger.debug("Ignoring %s (excluded)", fullpath) + elif is_hidden(fullpath): + self.logger.debug("Ignoring %s (hidden)", fullpath) else: empty = True for item in self._treewalk(fullpath): @@ -101,7 +100,12 @@ def _treewalk(self, path): self.logger.debug("Ignoring %s (filter)", fullpath) -if os.name== 'nt': +if os.environ.get("CODEQL_EXTRACTOR_PYTHON_OPTION_SKIP_HIDDEN_DIRECTORIES", "false") == "false": + + def is_hidden(path): + return False + +elif os.name== 'nt': import ctypes def is_hidden(path): diff --git a/python/ql/lib/change-notes/2025-04-30-extract-hidden-files-by-default.md b/python/ql/lib/change-notes/2025-04-30-extract-hidden-files-by-default.md new file mode 100644 index 000000000000..96372513499f --- /dev/null +++ b/python/ql/lib/change-notes/2025-04-30-extract-hidden-files-by-default.md @@ -0,0 +1,5 @@ +--- +category: minorAnalysis +--- + +- The Python extractor now extracts files in hidden directories by default. A new extractor option, `skip_hidden_directories` has been added as well. Setting it to `true` will make the extractor revert to the old behavior. diff --git a/python/ql/test/2/extractor-tests/hidden/test.expected b/python/ql/test/2/extractor-tests/hidden/test.expected index ca72363d8f02..21bd0dfb2dd9 100644 --- a/python/ql/test/2/extractor-tests/hidden/test.expected +++ b/python/ql/test/2/extractor-tests/hidden/test.expected @@ -1,3 +1,5 @@ +| .hidden/inner/test.py | +| .hidden/module.py | | folder/module.py | | package | | package/__init__.py | diff --git a/python/ql/test/extractor-tests/filter-option/Test.expected b/python/ql/test/extractor-tests/filter-option/Test.expected index 7ade39a5998c..56b1e36c2a93 100644 --- a/python/ql/test/extractor-tests/filter-option/Test.expected +++ b/python/ql/test/extractor-tests/filter-option/Test.expected @@ -3,3 +3,4 @@ | Module foo.bar | | Module foo.include_test | | Package foo | +| Script hidden_foo.py |