From ea09657dc350105bef5756fb8ae76b2db230b807 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Thu, 7 May 2026 10:41:45 +0000 Subject: [PATCH] [MINOR][INFRA] Ignore AGENTS.md and CONTRIBUTING.md in determine_modules_for_files ### What changes were proposed in this pull request? This PR extends `determine_modules_for_files` in `dev/sparktestsupport/utils.py` to ignore `AGENTS.md` and `CONTRIBUTING.md` in addition to the existing `README.md`. ### Why are the changes needed? A documentation-only PR that touches only `AGENTS.md` (e.g. https://github.com/apache/spark/pull/55707) currently triggers all CI test jobs because the file is not associated with any submodule, so it falls through to the `root` module. Neither file affects code or tests, and neither is consumed by the docs build, so they should be ignored just like `README.md`. ### Does this PR introduce _any_ user-facing change? No, this is only a testing infra change. ### How was this patch tested? Updated and ran the doctests in `dev/sparktestsupport/utils.py`. ### Was this patch authored or co-authored using generative AI tooling? Generated-by: Claude Opus 4.7 --- dev/sparktestsupport/utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dev/sparktestsupport/utils.py b/dev/sparktestsupport/utils.py index a66c0e70cb567..b969b96a16c52 100755 --- a/dev/sparktestsupport/utils.py +++ b/dev/sparktestsupport/utils.py @@ -34,7 +34,7 @@ def determine_modules_for_files(filenames): Given a list of filenames, return the set of modules that contain those files. If a file is not associated with a more specific submodule, then this method will consider that file to belong to the 'root' module. `.github` directory is counted only in GitHub Actions, - and `README.md` is always ignored. + and `README.md`, `AGENTS.md`, `CONTRIBUTING.md` are always ignored. >>> sorted(x.name for x in determine_modules_for_files(["python/pyspark/a.py", "sql/core/foo"])) ['pyspark-core', 'pyspark-errors', 'sql'] @@ -42,10 +42,14 @@ def determine_modules_for_files(filenames): ['root'] >>> [x.name for x in determine_modules_for_files(["sql/README.md"])] [] + >>> [x.name for x in determine_modules_for_files(["AGENTS.md"])] + [] + >>> [x.name for x in determine_modules_for_files(["CONTRIBUTING.md"])] + [] """ changed_modules = set() for filename in filenames: - if filename.endswith("README.md"): + if filename.endswith(("README.md", "AGENTS.md", "CONTRIBUTING.md")): continue if filename in ( "scalastyle-config.xml",