From 21223b28e17613ea39214db877aec7060aac6bea Mon Sep 17 00:00:00 2001 From: willieyz Date: Fri, 17 Oct 2025 14:14:23 +0800 Subject: [PATCH 1/4] Port `simpasm` from `mlkem-native` to `mldsa-native` - This commit ports the `simpasm` script and related functions in `autogen` from `mlkem-native` to `mldsa-native`. - Added the `simpasm` job in `base.yml` and changed the `nix-shell` from `ci` to `ci-cross` in `ci.yml` for ASM simplification. - Since the CFI scripts have not yet been ported to `mldsa-native`, the `--cfify` argument has been removed temporary. A TODO comment has been added and will be restored once the CFI scripts are ported. Signed-off-by: willieyz --- .github/workflows/base.yml | 28 +++ .github/workflows/ci.yml | 5 +- scripts/autogen | 267 ++++++++++++++++++++++ scripts/simpasm | 446 +++++++++++++++++++++++++++++++++++++ 4 files changed, 744 insertions(+), 2 deletions(-) create mode 100755 scripts/simpasm diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml index 3ca4a7e62..3e50cc892 100644 --- a/.github/workflows/base.yml +++ b/.github/workflows/base.yml @@ -218,6 +218,34 @@ jobs: - name: make lib run: | make lib + simpasm: + strategy: + fail-fast: false + matrix: + backend: + - arg: '--aarch64-clean' + name: Clean + # TODO: add backend option after we have optimized/clean seperation + # - arg: '' + # name: Optimized + simplify: + - arg: '' + name: Simplified + - arg: '--no-simplify' + name: Unmodified + runs-on: pqcp-arm64 + name: AArch64 dev backend (${{ matrix.simplify.name }}) + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Reinstate and test backend + uses: ./.github/actions/setup-shell + with: + nix-shell: 'ci' + gh_token: ${{ secrets.GITHUB_TOKEN }} + script: | + ./scripts/autogen ${{ matrix.simplify.arg }} + make clean + OPT=1 make quickcheck scan-build: strategy: fail-fast: false diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 48c2d5f7d..030ad3821 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -461,7 +461,8 @@ jobs: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - uses: ./.github/actions/setup-shell with: - nix-shell: 'ci' + nix-shell: 'ci-cross' # Need cross-compiler for ASM simplification + nix-cache: 'true' gh_token: ${{ secrets.GITHUB_TOKEN }} script: | - python3 ./scripts/autogen --dry-run + python3 ./scripts/autogen --dry-run --force-cross diff --git a/scripts/autogen b/scripts/autogen index 3654cb9a6..6c8e4aef0 100755 --- a/scripts/autogen +++ b/scripts/autogen @@ -1244,6 +1244,252 @@ class BibliographyEntry: return authors +def update_via_simpasm( + infile_full, + outdir, + outfile=None, + cflags=None, + preserve_header=True, + dry_run=False, + force_cross=False, +): + status_update("simpasm", infile_full) + + _, infile = os.path.split(infile_full) + if outfile is None: + outfile = infile + outfile_full = os.path.join(outdir, outfile) + + # Check if we need to use a cross-compiler + if "aarch64" in infile_full: + source_arch = "aarch64" + elif "x86_64" in infile_full: + source_arch = "x86_64" + else: + raise Exception(f"Could not detect architecture of source file {infile_full}.") + # Check native architecture + if platform.machine().lower() in ["arm64", "aarch64"]: + native_arch = "aarch64" + else: + native_arch = "x86_64" + + if native_arch != source_arch: + cross_prefix = f"{source_arch}-unknown-linux-gnu-" + cross_gcc = cross_prefix + "gcc" + # Check if cross-compiler is present + if shutil.which(cross_gcc) is None: + if force_cross is False: + return + raise Exception(f"Could not find cross toolchain {cross_prefix}") + else: + cross_prefix = None + + with tempfile.NamedTemporaryFile(suffix=".S") as tmp: + try: + # Determine architecture from filename + arch = "aarch64" if "aarch64" in infile_full else "x86_64" + + # TODO: Temporary remvoe the "--cfify", add back when CFI script added. + cmd = [ + "./scripts/simpasm", + "--objdump=llvm-objdump", + # "--cfify", + "--arch=" + arch, + "-i", + infile_full, + "-o", + tmp.name, + ] + if cross_prefix is not None: + # Stick with llvm-objdump for disassembly + cmd += ["--cc", cross_prefix + "gcc"] + cmd += ["--nm", cross_prefix + "nm"] + if cflags is not None: + cmd += [f'--cflags="{cflags}"'] + if preserve_header is True: + cmd += ["-p"] + r = subprocess.run( + cmd, + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + check=True, + text=True, + ) + except subprocess.CalledProcessError as e: + print(f"Command failed: {' '.join(cmd)}") + print(f"Exit code: {e.returncode}") + print(f"stderr: {e.stderr}") + raise Exception("Failed to run simpasm") from e + tmp.seek(0) + new_contents = tmp.read().decode() + + update_file(outfile_full, new_contents, dry_run=dry_run) + + +def update_via_copy(infile_full, outfile_full, dry_run=False, transform=None): + status_update("copy", f"{infile_full} -> {outfile_full}") + + with open(infile_full, "r") as f: + content = f.read() + + if transform is not None: + content = transform(content) + + update_file(outfile_full, content, dry_run=dry_run) + + +def update_via_remove(filename, dry_run=False): + if dry_run is True: + print( + f"Autogenerated file {filename} needs removing. Have you called scripts/autogen?", + file=sys.stderr, + ) + exit(1) + + # Remove the file + os.remove(filename) + + +def synchronize_file( + f, in_dir, out_dir, dry_run=False, delete=False, no_simplify=False, **kwargs +): + + # Only synchronize sources, but not README.md, Makefile and so on + extensions = (".c", ".h", ".i", ".inc", ".S") + + if not f.endswith(extensions): + return None + + basename = os.path.basename(f) + + if delete is True: + return basename + + if no_simplify is False and f.endswith(".S"): + update_via_simpasm(f, out_dir, dry_run=dry_run, **kwargs) + else: + # Update via copy + _, infile = os.path.split(f) + outfile_full = os.path.join(out_dir, infile) + # The header guards will also be checked later, but if we + # don't do it here, the dry-run would fail because of a + # mismatching intermediate file + if f.endswith(".h"): + transform = lambda c: adjust_header_guard_for_filename(c, outfile_full) + else: + transform = None + update_via_copy(f, outfile_full, dry_run=dry_run, transform=transform) + + return basename + + +def synchronize_backend( + in_dir, out_dir, dry_run=False, delete=False, no_simplify=False, **kwargs +): + copied = [] + + with ThreadPoolExecutor() as executor: + pool_results = list( + executor.map( + partial( + synchronize_file, + in_dir=in_dir, + out_dir=out_dir, + dry_run=dry_run, + delete=delete, + no_simplify=no_simplify, + **kwargs, + ), + get_files(os.path.join(in_dir, "*")), + ) + ) + + copied = [r for r in pool_results if r is not None] + + if delete is False: + return + + # Check for files in the target directory that have not been copied + for f in get_files(os.path.join(out_dir, "*")): + if os.path.basename(f) in copied: + continue + # Otherwise, remove it + update_via_remove(f, dry_run=dry_run) + + +def synchronize_backends( + *, dry_run=False, force_cross=False, clean=False, delete=False, no_simplify=False +): + if clean is False: + ty = "opt" + else: + ty = "clean" + + if delete is False: + # We may switch the AArch64 arithmetic backend, so adjust the metadata file + update_via_copy( + f"dev/aarch64_{ty}/meta.h", + "mldsa/native/aarch64/meta.h", + dry_run=dry_run, + transform=lambda c: adjust_header_guard_for_filename( + c, "mldsa/native/aarch64/meta.h" + ), + ) + + update_via_copy( + f"dev/x86_64/meta.h", + "mldsa/native/x86_64/meta.h", + dry_run=dry_run, + transform=lambda c: adjust_header_guard_for_filename( + c, "mldsa/native/x86_64/meta.h" + ), + ) + + synchronize_backend( + f"dev/aarch64_{ty}/src", + "mldsa/native/aarch64/src", + dry_run=dry_run, + delete=delete, + force_cross=force_cross, + no_simplify=no_simplify, + cflags="-Imldsa/native/aarch64/src", + ) + synchronize_backend( + "dev/fips202/aarch64/src", + "mldsa/fips202/native/aarch64/src", + dry_run=dry_run, + delete=delete, + force_cross=force_cross, + no_simplify=no_simplify, + cflags="-Imldsa/fips202/native/aarch64/src -march=armv8.4-a+sha3", + ) + synchronize_backend( + "dev/fips202/aarch64", + "mldsa/fips202/native/aarch64", + dry_run=dry_run, + delete=delete, + force_cross=force_cross, + no_simplify=no_simplify, + cflags="-Imldsa/fips202/native/aarch64 -march=armv8.4-a+sha3", + ) + synchronize_backend( + "dev/x86_64/src", + "mldsa/native/x86_64/src", + dry_run=dry_run, + delete=delete, + force_cross=force_cross, + no_simplify=no_simplify, + # Turn off control-flow protection (CET) explicitly. Newer versions of + # clang turn it on by default and insert endbr64 instructions at every + # global symbol. + # We insert endbr64 instruction manually via the MLD_ASM_FN_SYMBOL + # macro. + # This leads to duplicate endbr64 instructions causing a failure when + # comparing the object code before and after simplification. + cflags="-Imldsa/native/x86_64/src/ -mavx2 -mbmi2 -msse4 -fcf-protection=none", + ) + + def gen_markdown_citations_for(filename, bibliography, dry_run=False): # Skip BIBLIOGRAPHY.md @@ -1473,6 +1719,9 @@ def _main(): formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument("--dry-run", default=False, action="store_true") + parser.add_argument("--aarch64-clean", default=True, action="store_true") + parser.add_argument("--no-simplify", default=False, action="store_true") + parser.add_argument("--force-cross", default=False, action="store_true") args = parser.parse_args() @@ -1489,11 +1738,29 @@ def _main(): gen_avx2_zeta_file(args.dry_run) gen_avx2_rej_uniform_table(args.dry_run) high_level_status("Generated zeta and lookup tables") + + synchronize_backends( + dry_run=args.dry_run, + clean=args.aarch64_clean, + no_simplify=args.no_simplify, + force_cross=args.force_cross, + ) + high_level_status("Synchronized backends") + gen_header_guards(args.dry_run) high_level_status("Generated header guards") gen_preprocessor_comments(args.dry_run) high_level_status("Generated preprocessor comments") + synchronize_backends( + dry_run=args.dry_run, + clean=args.aarch64_clean, + delete=True, + force_cross=args.force_cross, + no_simplify=args.no_simplify, + ) + high_level_status("Completed final backend synchronization") + if __name__ == "__main__": _main() diff --git a/scripts/simpasm b/scripts/simpasm new file mode 100755 index 000000000..1826711ad --- /dev/null +++ b/scripts/simpasm @@ -0,0 +1,446 @@ +#!/usr/bin/env python3 +# Copyright (c) The mlkem-native project authors +# Copyright (c) The mldsa-native project authors +# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + +import subprocess +import argparse +import logging +import pathlib +import tempfile +import platform +import sys +import os +import re + + +def patchup_disasm(asm, cfify=False): + asm = asm.split("\n") + indentation = 8 + + def decode_label(asm_line): + r = re.search(r"^\s*[0-9a-fA-F]+\s*<([a-zA-Z0-9_]+)>:\s*$", asm_line) + if r is None: + return None + return r.group(1) + + def make_label(lbl): + if cfify: + return "L" + lbl + ":" + return lbl + ":" + + # Find first label + for i, l in enumerate(asm): + if decode_label(l) is not None: + break + + asm = asm[i + 1 :] + + def gen(asm): + for l in asm: + if l.strip() == "": + yield "" + continue + lbl = decode_label(l) + # Re-format labels as assembly labels + if lbl is not None: + yield make_label(lbl) + continue + # Drop comments + l = l.split(";")[0] + # Re-format references to labels + # Those are assumed to have the form `0xDEADBEEF