diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 93d25da6..d7bb250c 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -6,6 +6,10 @@ Follow the repository's automated style configuration in - Keep changes consistent with existing Polaris patterns. - For Python, follow the path-specific instructions in `.github/instructions/python.instructions.md`. +- Use the repository's Pixi environment before considering any other Python + environment. Prefer `pixi run -e py314 ` for `python`, `pytest`, + `pre-commit`, `ruff`, and `mypy`, and check `.pixi/` plus `pixi.toml` + before concluding a tool is missing. - pre-commit on changed files is required before finishing; if sandboxed execution fails, request escalation and do not close the task until it has run or the user declines. diff --git a/.github/workflows/copilot-setup-steps.yml b/.github/workflows/copilot-setup-steps.yml index c7809009..e505c3a5 100644 --- a/.github/workflows/copilot-setup-steps.yml +++ b/.github/workflows/copilot-setup-steps.yml @@ -38,4 +38,6 @@ jobs: - name: Verify the agent environment run: | pixi run -e py314 python --version + pixi run -e py314 pytest --version + pixi run -e py314 pre-commit --version pixi run -e py314 mache --help diff --git a/AGENTS.md b/AGENTS.md index 729cb4c8..c4b5b50e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -10,6 +10,18 @@ These instructions apply to the whole repository unless a deeper - If an instruction here conflicts with automated tooling, follow the automated tooling. +## Python environment + +- This repository uses Pixi as the primary development environment manager. +- Check the root `.pixi/` and `pixi.toml` before creating or selecting any + other Python environment. +- Prefer `pixi run -e py314 ` for Python tools such as `python`, + `pytest`, `pre-commit`, `ruff`, and `mypy`. +- If you need an interactive shell, use `pixi shell -e py314` from the repo + root instead of creating a new virtual environment. +- Do not treat `pytest: command not found` in a plain shell as a missing + dependency until you have tried the command through Pixi. + ## Python style - Keep Python lines at 79 characters or fewer whenever possible. @@ -28,7 +40,12 @@ These instructions apply to the whole repository unless a deeper ## Validation +- Run tests and linting through Pixi unless the task explicitly requires a + different environment. +- Prefer `pixi run -e py314 pytest` for tests. - pre-commit on changed files is required before finishing; if sandboxed execution fails, request escalation and do not close the task until it has run or the user declines. +- Prefer `pixi run -e py314 pre-commit run --files ...` for required + validation. - Prefer fixing lint and formatting issues rather than suppressing them. diff --git a/docs/developers_guide/config_machines_updates.md b/docs/developers_guide/config_machines_updates.md index ef0d508f..91b76f32 100644 --- a/docs/developers_guide/config_machines_updates.md +++ b/docs/developers_guide/config_machines_updates.md @@ -89,9 +89,9 @@ The update path is: 2. The workflow creates or refreshes a GitHub issue. 3. Copilot is assigned to that issue. 4. Copilot opens a pull request against `main`. -5. That PR updates `mache/cime_machine_config/config_machines.xml` first, then - any related Spack templates or version strings that the report indicates - should be reviewed. +5. That PR replaces `mache/cime_machine_config/config_machines.xml` with the + latest version from `E3SM-Project/E3SM`, then updates any related Spack + templates or version strings that the report indicates should be reviewed. 6. A maintainer reviews and merges the PR. 7. The next daily run compares the merged repository state against upstream again. @@ -112,8 +112,16 @@ Copilot receives instructions from two places. `agent_assignment` payload: - Use the issue body as the task definition. -- Update `config_machines.xml` first. -- Then update related Spack templates and version strings. +- Run `pixi run -e py314 python utils/update_cime_machine_config.py + --work-dir .`. +- Replace `mache/cime_machine_config/config_machines.xml` with the generated + `upstream_config_machines.xml`, then remove that temporary file before + committing. +- State the upstream E3SM commit hash in the PR summary. +- Then update related Spack templates and version strings under + `mache/spack/templates/*.yaml`, + `mache/spack/templates/*.sh`, and + `mache/spack/templates/*.csh`. - Add TODO comments in the PR when prefix or path changes need reviewer confirmation. @@ -123,6 +131,7 @@ Copilot receives instructions from two places. drift and includes: - the timestamp and upstream source URL, +- the upstream revision when it could be resolved from GitHub, - the workflow run URL, - the list of affected supported machines, - the required work list, @@ -131,9 +140,16 @@ drift and includes: The required work section tells Copilot to: -- update `mache/cime_machine_config/config_machines.xml` for the affected - supported machines, -- update Spack templates and version strings when module or environment drift +- run `pixi run -e py314 python utils/update_cime_machine_config.py + --work-dir .`, +- replace `mache/cime_machine_config/config_machines.xml` with the generated + `upstream_config_machines.xml`, +- remove `upstream_config_machines.xml` before committing, +- state the upstream E3SM commit hash in the PR summary, +- update Spack templates and version strings in + `mache/spack/templates/*.yaml`, + `mache/spack/templates/*.sh`, and + `mache/spack/templates/*.csh` when module or environment drift implies different package versions, - keep the PR focused when the change is only version or module drift, - add a TODO in the PR instead of guessing when a new prefix or path is not @@ -173,13 +189,17 @@ in this order. ### 1. `config_machines.xml` changes -Verify that the PR updates -`mache/cime_machine_config/config_machines.xml` only for supported machines -reported by the workflow, and that those changes match the current upstream -E3SM machine definitions. +Verify that the PR replaces +`mache/cime_machine_config/config_machines.xml` with the current upstream file +from `E3SM-Project/E3SM`, rather than hand-editing only selected machine +blocks. + +The supported-machine report from the workflow tells you which machine entries +caused the drift and which sections deserve the closest review. In practice, the easiest cross-check is to compare the PR against the report -artifact from the workflow run that opened or refreshed the issue. +artifact and the upstream XML source from the workflow run that opened or +refreshed the issue. ### 2. Related Spack updates diff --git a/mache/cime_machine_config/report.py b/mache/cime_machine_config/report.py index 1a67b22c..bfd57bc3 100644 --- a/mache/cime_machine_config/report.py +++ b/mache/cime_machine_config/report.py @@ -51,6 +51,7 @@ def has_updates(self) -> bool: class UpdateReport: generated_at: str upstream_url: str + upstream_revision: str | None supported_machines: list[str] machines: list[MachineUpdate] @@ -64,7 +65,13 @@ def to_dict(self) -> dict[str, object]: return data -def build_update_report(old_xml, new_xml, supported_machines, upstream_url): +def build_update_report( + old_xml, + new_xml, + supported_machines, + upstream_url, + upstream_revision=None, +): """Build a structured report for supported machine config changes.""" old_root = etree.parse(old_xml).getroot() @@ -87,6 +94,7 @@ def build_update_report(old_xml, new_xml, supported_machines, upstream_url): return UpdateReport( generated_at=datetime.now(timezone.utc).isoformat(), upstream_url=upstream_url, + upstream_revision=upstream_revision, supported_machines=list(supported_machines), machines=machines, ) @@ -113,6 +121,9 @@ def render_update_issue(report, run_url=None): f'- Upstream source: {report.upstream_url}', ] + if report.upstream_revision is not None: + lines.append(f'- Upstream revision: {report.upstream_revision}') + if run_url: lines.append(f'- Workflow run: {run_url}') @@ -121,14 +132,24 @@ def render_update_issue(report, run_url=None): '', 'Required work:', '', - '- Update mache/cime_machine_config/config_machines.xml for the', - ' affected supported machines.', + '- Run `pixi run -e py314 python ' + 'utils/update_cime_machine_config.py --work-dir .` from the', + ' repository root.', + '- Replace mache/cime_machine_config/config_machines.xml with', + ' upstream_config_machines.xml generated by that command, then', + ' remove upstream_config_machines.xml before committing.', '- If module or environment changes imply different', ' system-package', ' versions, update the corresponding mache Spack templates and', ' version strings for the affected toolchains.', + '- Look for related Spack template files under', + ' mache/spack/templates/*.yaml,', + ' mache/spack/templates/*.sh, and', + ' mache/spack/templates/*.csh.', '- If the only follow-up is module or version drift, keep the PR', ' focused on those updates.', + '- In the PR summary, state which upstream E3SM commit hash was', + ' used for the config_machines.xml replacement.', '- If any prefix or path values changed and the correct', ' replacement', ' is not obvious, add a TODO comment in the PR for the reviewer', @@ -350,6 +371,12 @@ def _render_machine_section(machine): '- Spack templates to review: ' f'{", ".join(machine.spack_templates_to_review)}' ) + lines.append( + '- Spack template globs: ' + f'mache/spack/templates/{machine.machine}*.yaml, ' + f'mache/spack/templates/{machine.machine}*.sh, ' + f'mache/spack/templates/{machine.machine}*.csh' + ) else: lines.append('- Spack templates to review: none matched') diff --git a/tests/test_cime_machine_config_report.py b/tests/test_cime_machine_config_report.py index a0702d86..45b25f4a 100644 --- a/tests/test_cime_machine_config_report.py +++ b/tests/test_cime_machine_config_report.py @@ -50,6 +50,7 @@ def test_build_update_report_detects_spack_related_drift( new_xml=new_xml, supported_machines=['test-machine'], upstream_url='https://example.invalid/config_machines.xml', + upstream_revision='deadbeef1234', ) assert report.has_updates is True @@ -97,13 +98,22 @@ def test_render_update_issue_includes_required_instructions( new_xml=new_xml, supported_machines=['test-machine'], upstream_url='https://example.invalid/config_machines.xml', + upstream_revision='deadbeef1234', ) markdown = render_update_issue( report, run_url='https://github.example/actions/runs/1', ) - assert 'Update mache/cime_machine_config/config_machines.xml' in markdown + assert ( + 'pixi run -e py314 python utils/update_cime_machine_config.py' + in markdown + ) + assert 'upstream_config_machines.xml' in markdown + assert 'deadbeef1234' in markdown + assert 'state which upstream E3SM commit hash was' in markdown + assert 'mache/spack/templates/*.yaml' in markdown + assert 'mache/spack/templates/test-machine*.sh' in markdown assert 'TODO comment in the PR for the reviewer' in markdown assert 'test-machine_gnu_mpich.yaml' in markdown assert 'https://github.example/actions/runs/1' in markdown diff --git a/tests/test_update_cime_machine_config.py b/tests/test_update_cime_machine_config.py new file mode 100644 index 00000000..c9e99a51 --- /dev/null +++ b/tests/test_update_cime_machine_config.py @@ -0,0 +1,69 @@ +from importlib.util import module_from_spec, spec_from_file_location +from pathlib import Path + + +def _load_update_module(): + module_path = ( + Path(__file__).resolve().parents[1] + / 'utils' + / 'update_cime_machine_config.py' + ) + spec = spec_from_file_location('update_cime_machine_config', module_path) + assert spec is not None + assert spec.loader is not None + + module = module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def test_parse_github_raw_url_supports_default_e3sm_url(): + update_module = _load_update_module() + + source = update_module._parse_github_raw_url( + 'https://raw.githubusercontent.com/E3SM-Project/E3SM/' + 'refs/heads/master/cime_config/machines/config_machines.xml' + ) + + assert source == ( + 'E3SM-Project', + 'E3SM', + 'master', + 'cime_config/machines/config_machines.xml', + ) + + +def test_get_latest_commit_sha_uses_github_commits_api(monkeypatch): + update_module = _load_update_module() + captured = {} + + class FakeResponse: + def raise_for_status(self): + return None + + def json(self): + return [{'sha': 'abc123'}] + + def fake_get(url, *, params, timeout): + captured['url'] = url + captured['params'] = params + captured['timeout'] = timeout + return FakeResponse() + + monkeypatch.setattr(update_module.requests, 'get', fake_get) + + sha = update_module._get_latest_commit_sha( + owner='E3SM-Project', + repo='E3SM', + ref='master', + path='cime_config/machines/config_machines.xml', + ) + + assert sha == 'abc123' + assert captured['url'].endswith('/repos/E3SM-Project/E3SM/commits') + assert captured['params'] == { + 'sha': 'master', + 'path': 'cime_config/machines/config_machines.xml', + 'per_page': 1, + } + assert captured['timeout'] == 60 diff --git a/utils/README.md b/utils/README.md index 582461d7..75f73198 100644 --- a/utils/README.md +++ b/utils/README.md @@ -6,14 +6,17 @@ mache package. ## update CIME machine config The `update_cime_machine_config.py` script can be used to download the latest -version of the `config_machines.xml` file from E3SM's `master` branch, then -compare it from the previous version stored in `mache` and show changes related -to supported machines. - -A developer can then copy `new_config_machines.xml` into -`mache/cime_machine_config/config_machines.xml` as part of a PR that makes -relevant updates. They should also make the changes associated with the -differences that this utility displays in the appropriate `mache/spack/templates` files. +version of the `config_machines.xml` file from `E3SM-Project/E3SM`, compare it +with the copy stored in `mache`, and show changes related to supported +machines. + +When drift is detected, the follow-up PR should replace +`mache/cime_machine_config/config_machines.xml` with the latest upstream file +from `E3SM-Project/E3SM`. This utility helps confirm the drift and review the +affected supported machines; it does not rewrite the repository copy in place. + +The PR should also make the changes associated with the differences that this +utility displays in the appropriate `mache/spack/templates` files. ## extract spack shell scripts from CIME machine config diff --git a/utils/manage_cime_machine_config_issue.py b/utils/manage_cime_machine_config_issue.py index 5e231164..df245549 100644 --- a/utils/manage_cime_machine_config_issue.py +++ b/utils/manage_cime_machine_config_issue.py @@ -197,10 +197,19 @@ def build_issue_payload( 'target_repo': f'{owner}/{repo}', 'base_branch': base_branch, 'custom_instructions': ( - 'Use the issue body as the task definition. Update ' - 'config_machines.xml first, then the related Spack ' - 'templates and version strings. Add TODO comments in the ' - 'PR when prefix or path changes need reviewer confirmation.' + 'Use the issue body as the task definition. Run `pixi run ' + '-e py314 python utils/update_cime_machine_config.py ' + '--work-dir .`, replace ' + 'mache/cime_machine_config/config_machines.xml with ' + 'upstream_config_machines.xml, remove ' + 'upstream_config_machines.xml before committing, and state ' + 'the upstream E3SM commit hash in the PR summary. Then ' + 'update the related Spack templates and version strings in ' + 'mache/spack/templates/*.yaml, ' + 'mache/spack/templates/*.sh, and ' + 'mache/spack/templates/*.csh. ' + 'Add TODO comments in the PR when prefix or path changes ' + 'need reviewer confirmation.' ), 'custom_agent': '', 'model': '', diff --git a/utils/update_cime_machine_config.py b/utils/update_cime_machine_config.py index c2d47dda..2f065a81 100755 --- a/utils/update_cime_machine_config.py +++ b/utils/update_cime_machine_config.py @@ -3,6 +3,9 @@ import argparse import os import tempfile +from urllib.parse import urlparse + +import requests import mache.version from mache.cime_machine_config.report import ( @@ -13,6 +16,12 @@ from mache.io import download_file from mache.machines import get_supported_machines +DEFAULT_UPSTREAM_URL = ( + 'https://raw.githubusercontent.com/E3SM-Project/E3SM/' + 'refs/heads/master/cime_config/machines/config_machines.xml' +) +GITHUB_API_URL = 'https://api.github.com' + def main(): """ @@ -33,10 +42,7 @@ def main(): ) parser.add_argument( '--upstream-url', - default=( - 'https://raw.githubusercontent.com/E3SM-Project/E3SM/' - 'refs/heads/master/cime_config/machines/config_machines.xml' - ), + default=DEFAULT_UPSTREAM_URL, help='The upstream config_machines.xml URL to compare against.', ) parser.add_argument( @@ -77,15 +83,19 @@ def main(): def build_report(*, upstream_url, work_dir=None): """Download upstream config and return a structured drift report.""" + upstream_revision = _resolve_upstream_revision(upstream_url) + if work_dir is not None: return _build_report_in_dir( upstream_url=upstream_url, + upstream_revision=upstream_revision, work_dir=work_dir, ) with tempfile.TemporaryDirectory() as temp_dir: return _build_report_in_dir( upstream_url=upstream_url, + upstream_revision=upstream_revision, work_dir=temp_dir, ) @@ -111,7 +121,7 @@ def print_console_report(report): print(f' spack templates: {templates}') -def _build_report_in_dir(*, upstream_url, work_dir): +def _build_report_in_dir(*, upstream_url, upstream_revision, work_dir): machines = get_supported_machines() upstream_filename = os.path.join(work_dir, 'upstream_config_machines.xml') old_filename = 'mache/cime_machine_config/config_machines.xml' @@ -122,7 +132,66 @@ def _build_report_in_dir(*, upstream_url, work_dir): new_xml=upstream_filename, supported_machines=machines, upstream_url=upstream_url, + upstream_revision=upstream_revision, + ) + + +def _resolve_upstream_revision(upstream_url): + github_source = _parse_github_raw_url(upstream_url) + if github_source is None: + return None + + owner, repo, ref, path = github_source + return _get_latest_commit_sha( + owner=owner, + repo=repo, + ref=ref, + path=path, + ) + + +def _parse_github_raw_url(upstream_url): + parsed = urlparse(upstream_url) + if parsed.netloc != 'raw.githubusercontent.com': + return None + + parts = [part for part in parsed.path.split('/') if part != ''] + if len(parts) < 7: + return None + + owner, repo = parts[0], parts[1] + if parts[2:4] != ['refs', 'heads']: + return None + + ref = parts[4] + path = '/'.join(parts[5:]) + if path == '': + return None + + return owner, repo, ref, path + + +def _get_latest_commit_sha(*, owner, repo, ref, path): + response = requests.get( + f'{GITHUB_API_URL}/repos/{owner}/{repo}/commits', + params={ + 'sha': ref, + 'path': path, + 'per_page': 1, + }, + timeout=60, ) + response.raise_for_status() + + payload = response.json() + if not isinstance(payload, list) or len(payload) == 0: + return None + + sha = payload[0].get('sha') + if not isinstance(sha, str) or sha == '': + return None + + return sha if __name__ == '__main__':