diff --git a/docs/proposals/20221007-ir-state.md b/docs/proposals/20221007-ir-state.md new file mode 100644 index 000000000..facbc8f90 --- /dev/null +++ b/docs/proposals/20221007-ir-state.md @@ -0,0 +1,126 @@ +# Refactor the IR to use state + +Author: +- [kemingy](https://github.com/kemingy/) + + +## Summary + +This proposal is designed to refactor the IR, as we discussed in [envd/#91](https://github.com/tensorchord/envd/issues/91#issuecomment-1239405359). + +## Motivation + +* Currently, we build the image with hard-coded order and parallelism. It's not flexible enough for some use cases. +* The design of buildkit LLB state is suitable for method chaining implementation. Users can define the dependencies and parallelism easily. + +## Goals + +* Able to define the order and parallelism. +* Provide low level APIs to build the image from scratch. + +## API + +*highlight* +* `build()` function need to return a state +* a state can started from + * `scratch()` + * `merge()` + * the returned state from the function call +* envd should provide the default build like `envd_python(version=3.10, dev=True)` + +```python +def conda_env(root, version="3.10"): + conda = root.env("MAMBA_ROOT_PREFIX", "/opt/conda").run( + [ + "curl micro.mamba.pm/install.sh | sh -s -- -y", + "/opt/conda/bin/micromamba create -n envd python={}".format(version), + ] + ) + return conda + + +def install_shell(root, sh="bash"): + if sh == "bash": + return root + + +def install_vscode_ext(ext=()): + root = merge(http("openvsx/{}".format(x), filename=x) for x in ext) + return root + + +def install_key(): + root = ( + scratch() + .mkdir("/var/envd", permission=755) + .file("/var/envd/authorized_keys", data="xxx envd", permission=644) + ) + return root + + +def parallel_build(root): + """demo for parallel + diff + merge""" + # use `root.state()` to create a copy explicitly + conda = conda_env(install_shell(root.state())) + vscode = install_vscode_ext() + key = config_key() + # use `diff` when it's *not* built from scratch + return merge([root, key, vscode, diff(root, conda)]) + + +def build(): + root = ( + image("ubuntu:22.04") + .apt_packages( + ["curl", "wget"], update=True, clean=True, without_recommends=True + ) + .run(["curl https://starship.rs/install.sh | sh -s -- -y"], mount=None) + .copy( + image("tensorchord/envd-sshd-from-scratch:latest"), + host_path="/usr/bin/envd-sshd", + envd_path="/var/envd/bin/envd-sshd", + ) + ) + return parallel_build(root) +``` + +### Changes + +Before: + +```python +def build(): + base(os="ubuntu20.04", language="python") + install.python_packages( + [ + "via", + ] + ) + io.copy(host_path="./build.envd", envd_path="/") + runtime.command( + commands={ + "test": "ls /", + } + ) + runtime.environ(env={"ENVD_MODE": "DEV"}) +``` + +After: + +```python +def build(): + root = ( + image("ubuntu:20.04") + .setup_envd_python(version="3.10") + .install_python_packages(["via"]) + .copy(localhost(), host_path="./build.envd", envd_path="/") + .env({"ENVD": "DEV"}) + .runtime_cmd({"test": "ls /"}) + ) + return root +``` + +## Risk + +* Implementation in Starlark +* VSCode language server