From 6be9c2dcea0915555a2101ed43ab5b5136f2cfc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jiri=20Dan=C4=9Bk?= Date: Wed, 11 Dec 2024 09:46:05 +0100 Subject: [PATCH] RHOAIENG-16517: chore(tests): add sandboxing so that Dockerfile builds can only access files we know they access --- .../workflows/build-notebooks-TEMPLATE.yaml | 7 ++ Makefile | 3 +- ci/cached-builds/gha_lvm_overlay.sh | 3 +- poetry.lock | 47 ++++++----- pyproject.toml | 1 + scripts/sandbox.py | 77 +++++++++++++++++++ scripts/sandbox_tests.py | 55 +++++++++++++ 7 files changed, 173 insertions(+), 20 deletions(-) create mode 100755 scripts/sandbox.py create mode 100755 scripts/sandbox_tests.py diff --git a/.github/workflows/build-notebooks-TEMPLATE.yaml b/.github/workflows/build-notebooks-TEMPLATE.yaml index d50962828..1344b393a 100644 --- a/.github/workflows/build-notebooks-TEMPLATE.yaml +++ b/.github/workflows/build-notebooks-TEMPLATE.yaml @@ -39,6 +39,13 @@ jobs: - uses: actions/checkout@v4 + - run: mkdir -p $TMPDIR + + # for bin/buildinputs in scripts/sandbox.py + - uses: actions/setup-go@v5 + with: + cache-dependency-path: "**/*.sum" + - name: Login to GitHub Container Registry uses: docker/login-action@v3 with: diff --git a/Makefile b/Makefile index 11ffd6b24..65c42d803 100644 --- a/Makefile +++ b/Makefile @@ -69,7 +69,8 @@ define build_image $(eval BUILD_ARGS := --build-arg BASE_IMAGE=$(BASE_IMAGE_NAME)), $(eval BUILD_ARGS :=) ) - $(CONTAINER_ENGINE) build $(CONTAINER_BUILD_CACHE_ARGS) --tag $(IMAGE_NAME) --file $(2)/Dockerfile $(BUILD_ARGS) $(ROOT_DIR)/ + $(ROOT_DIR)/scripts/sandbox.py --dockerfile '$(2)/Dockerfile' -- \ + $(CONTAINER_ENGINE) build $(CONTAINER_BUILD_CACHE_ARGS) --tag $(IMAGE_NAME) --file '$(2)/Dockerfile' $(BUILD_ARGS) {}\; endef # Push function for the notebok image: diff --git a/ci/cached-builds/gha_lvm_overlay.sh b/ci/cached-builds/gha_lvm_overlay.sh index 4d2f55520..d48151035 100755 --- a/ci/cached-builds/gha_lvm_overlay.sh +++ b/ci/cached-builds/gha_lvm_overlay.sh @@ -7,7 +7,8 @@ set -Eeuo pipefail # This script creates file-backed volumes on /dev/root and /dev/sda1, then creates ext4 over both, and mounts it for our use # https://github.com/easimon/maximize-build-space/blob/master/action.yml -root_reserve_mb=2048 +# root_reserve_mb=2048 was running out of disk space building cuda images +root_reserve_mb=4096 temp_reserve_mb=100 swap_size_mb=4096 diff --git a/poetry.lock b/poetry.lock index fcf541bac..88d4fcd77 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,23 +1,23 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. [[package]] name = "attrs" -version = "23.2.0" +version = "24.2.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.7" files = [ - {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"}, - {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"}, + {file = "attrs-24.2.0-py3-none-any.whl", hash = "sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2"}, + {file = "attrs-24.2.0.tar.gz", hash = "sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346"}, ] [package.extras] -cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] -dev = ["attrs[tests]", "pre-commit"] -docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] -tests = ["attrs[tests-no-zope]", "zope-interface"] -tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] -tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] +benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"] +tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] [[package]] name = "colorama" @@ -43,13 +43,13 @@ files = [ [[package]] name = "packaging" -version = "24.1" +version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, - {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, + {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, + {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] [[package]] @@ -67,22 +67,33 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "pyfakefs" +version = "5.7.2" +description = "pyfakefs implements a fake file system that mocks the Python file system modules." +optional = false +python-versions = ">=3.7" +files = [ + {file = "pyfakefs-5.7.2-py3-none-any.whl", hash = "sha256:e1527b0e8e4b33be52f0b024ca1deb269c73eecd68457c6b0bf608d6dab12ebd"}, + {file = "pyfakefs-5.7.2.tar.gz", hash = "sha256:40da84175c5af8d9c4f3b31800b8edc4af1e74a212671dd658b21cc881c60000"}, +] + [[package]] name = "pytest" -version = "8.2.2" +version = "8.3.4" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" files = [ - {file = "pytest-8.2.2-py3-none-any.whl", hash = "sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343"}, - {file = "pytest-8.2.2.tar.gz", hash = "sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977"}, + {file = "pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6"}, + {file = "pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761"}, ] [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} iniconfig = "*" packaging = "*" -pluggy = ">=1.5,<2.0" +pluggy = ">=1.5,<2" [package.extras] dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] @@ -105,4 +116,4 @@ pytest = ">=7.0" [metadata] lock-version = "2.0" python-versions = "~3.12" -content-hash = "b083c8ea5b4dc42a2c76a0c2b51af47f8813ba5f40929bafa3f370f3290d971f" +content-hash = "b3040f243a10b6a64e04b687a322b7ab735d82fefc4dc45d8177c2c4f286e049" diff --git a/pyproject.toml b/pyproject.toml index 87cd2fc69..0873cd45b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ python = "~3.12" [tool.poetry.group.dev.dependencies] pytest = "^8.2.2" pytest-subtests = "^0.12.1" +pyfakefs = "^5.7.2" [build-system] requires = ["poetry-core"] diff --git a/scripts/sandbox.py b/scripts/sandbox.py new file mode 100755 index 000000000..352723b63 --- /dev/null +++ b/scripts/sandbox.py @@ -0,0 +1,77 @@ +#! /usr/bin/env python3 + +import argparse +import logging +import pathlib +import shutil +import subprocess +import sys +import tempfile +import json +from typing import cast + +ROOT_DIR = pathlib.Path(__file__).parent.parent +MAKE = shutil.which("gmake") or shutil.which("make") + +logging.basicConfig() +logging.root.name = pathlib.Path(__file__).name + + +class Args(argparse.Namespace): + dockerfile: pathlib.Path + remaining: list[str] + + +def main() -> int: + p = argparse.ArgumentParser(allow_abbrev=False) + p.add_argument("--dockerfile", type=pathlib.Path, required=True) + p.add_argument('remaining', nargs=argparse.REMAINDER) + + args = cast(Args, p.parse_args()) + + print(f"{__file__=} started with {args=}") + + if not args.remaining or args.remaining[0] != "--": + print("must specify command to execute after double dashes at the end, such as `-- command --args ...`") + return 1 + if not "{};" in args.remaining: + print("must give a `{};` parameter that will be replaced with new build context") + return 1 + + if not (ROOT_DIR / "bin/buildinputs").exists(): + subprocess.check_call([MAKE, "bin/buildinputs"], cwd=ROOT_DIR) + stdout = subprocess.check_output([ROOT_DIR / "bin/buildinputs", str(args.dockerfile)], + text=True, cwd=ROOT_DIR) + prereqs = [pathlib.Path(file) for file in json.loads(stdout)] if stdout != "\n" else [] + print(f"{prereqs=}") + + with tempfile.TemporaryDirectory(delete=True) as tmpdir: + setup_sandbox(prereqs, pathlib.Path(tmpdir)) + command = [arg if arg != "{};" else tmpdir for arg in args.remaining[1:]] + print(f"running {command=}") + try: + subprocess.check_call(command) + except subprocess.CalledProcessError as err: + logging.error("Failed to execute process, see errors logged above ^^^") + return err.returncode + return 0 + + +def setup_sandbox(prereqs: list[pathlib.Path], tmpdir: pathlib.Path): + # always adding .gitignore + gitignore = ROOT_DIR / ".gitignore" + if gitignore.exists(): + shutil.copy(gitignore, tmpdir) + + for dep in prereqs: + if dep.is_absolute(): + dep = dep.relative_to(ROOT_DIR) + if dep.is_dir(): + shutil.copytree(dep, tmpdir / dep, symlinks=False, dirs_exist_ok=True) + else: + (tmpdir / dep.parent).mkdir(parents=True, exist_ok=True) + shutil.copy(dep, tmpdir / dep.parent) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/scripts/sandbox_tests.py b/scripts/sandbox_tests.py new file mode 100755 index 000000000..383eab199 --- /dev/null +++ b/scripts/sandbox_tests.py @@ -0,0 +1,55 @@ +#! /usr/bin/env python3 + +import glob +import logging +import pathlib +import sys +import tempfile + +import pyfakefs.fake_filesystem + +from scripts.sandbox import setup_sandbox + +ROOT_DIR = pathlib.Path(__file__).parent.parent + +logging.basicConfig() +logging.root.name = pathlib.Path(__file__).name + +class TestSandbox: + def test_filesystem_file(self, fs: pyfakefs.fake_filesystem.FakeFilesystem): + pathlib.Path("a").write_text("a") + + with tempfile.TemporaryDirectory(delete=True) as tmpdir: + setup_sandbox([pathlib.Path("a")], pathlib.Path(tmpdir)) + assert (pathlib.Path(tmpdir) / "a").is_file() + + def test_filesystem_dir_with_file(self, fs: pyfakefs.fake_filesystem.FakeFilesystem): + pathlib.Path("a/").mkdir() + pathlib.Path("a/b").write_text("b") + + with tempfile.TemporaryDirectory(delete=True) as tmpdir: + setup_sandbox([pathlib.Path("a")], pathlib.Path(tmpdir)) + assert (pathlib.Path(tmpdir) / "a").is_dir() + assert (pathlib.Path(tmpdir) / "a" / "b").is_file() + + def test_filesystem_dir_with_dir_with_file(self, fs: pyfakefs.fake_filesystem.FakeFilesystem): + pathlib.Path("a/b").mkdir(parents=True) + pathlib.Path("a/b/c").write_text("c") + + with tempfile.TemporaryDirectory(delete=True) as tmpdir: + setup_sandbox([pathlib.Path("a")], pathlib.Path(tmpdir)) + assert (pathlib.Path(tmpdir) / "a").is_dir() + assert (pathlib.Path(tmpdir) / "a" / "b").is_dir() + assert (pathlib.Path(tmpdir) / "a" / "b" / "c").is_file() + + def test_filesystem_file_in_dir_in_dir(self, fs: pyfakefs.fake_filesystem.FakeFilesystem): + pathlib.Path("a/b").mkdir(parents=True) + pathlib.Path("a/b/c").write_text("c") + + with tempfile.TemporaryDirectory(delete=True) as tmpdir: + setup_sandbox([pathlib.Path("a/b/c")], pathlib.Path(tmpdir)) + for g in glob.glob("**/*", recursive=True): + logging.debug("%s", g) + assert (pathlib.Path(tmpdir) / "a").is_dir() + assert (pathlib.Path(tmpdir) / "a" / "b").is_dir() + assert (pathlib.Path(tmpdir) / "a" / "b" / "c").is_file()