diff --git a/changelog.d/pr-205.md b/changelog.d/pr-205.md new file mode 100644 index 00000000..4c9037a6 --- /dev/null +++ b/changelog.d/pr-205.md @@ -0,0 +1,3 @@ +### 🚀 Enhancements and New Features + +- Add --assume-ready for image and extra-inputs. [PR #205](https://github.com/datalad/datalad-container/pull/205) (by [@bpinsard](https://github.com/bpinsard)) diff --git a/datalad_container/containers_run.py b/datalad_container/containers_run.py index 749e764f..cb3f3e66 100644 --- a/datalad_container/containers_run.py +++ b/datalad_container/containers_run.py @@ -26,6 +26,7 @@ from datalad.utils import ensure_iter from datalad_container.find_container import find_container_ +from datalad.support.constraints import EnsureChoice lgr = logging.getLogger("datalad.containers.containers_run") @@ -39,8 +40,17 @@ container_name=Parameter( args=('-n', '--container-name',), metavar="NAME", - doc="""Specify the name of or a path to a known container to use + doc="""Specify the name of or a path to a known container to use for execution, in case multiple containers are configured."""), + assume_ready=Parameter( + args=("--assume-ready",), + nargs="*", + constraints=EnsureChoice(None, "image", "inputs", "outputs", "extra-inputs"), + doc="""Assume that inputs do not need to be retrieved and/or outputs do not + need to unlocked or removed, or containers/extra-inputs to be retrieved, + before running the command. This option allows + you to avoid the expense of these preparation steps if you know that they + are unnecessary."""), ) @@ -79,7 +89,7 @@ class ContainersRun(Interface): @eval_results def __call__(cmd, container_name=None, dataset=None, inputs=None, outputs=None, message=None, expand=None, - explicit=False, sidecar=None): + explicit=False, sidecar=None, assume_ready=None): from unittest.mock import \ patch # delayed, since takes long (~600ms for yoh) pwd, _ = get_command_pwds(dataset) @@ -154,28 +164,47 @@ def __call__(cmd, container_name=None, dataset=None, # just prepend and pray cmd = container['path'] + ' ' + cmd + assume_ready = assume_ready or [] extra_inputs = [] - for extra_input in ensure_iter(container.get("extra-input",[]), set): - try: - xi_kwargs = dict( - img_dspath=image_dspath, - img_dirpath=op.dirname(image_path) or ".", - ) - extra_inputs.append(extra_input.format(**xi_kwargs)) - except KeyError as exc: - yield get_status_dict( - 'run', - ds=ds, - status='error', - message=( - 'Unrecognized extra_input placeholder: %s. ' - 'See containers-add for information on known ones: %s', - exc, - ", ".join(xi_kwargs))) - return + if "extra-inputs" not in assume_ready: + for extra_input in ensure_iter(container.get("extra-input",[]), set): + try: + xi_kwargs = dict( + img_dspath=image_dspath, + img_dirpath=op.dirname(image_path) or ".", + ) + extra_inputs.append(extra_input.format(**xi_kwargs)) + except KeyError as exc: + yield get_status_dict( + 'run', + ds=ds, + status='error', + message=( + 'Unrecognized extra_input placeholder: %s. ' + 'See containers-add for information on known ones: %s', + exc, + ", ".join(xi_kwargs))) + return + else: + # filter the whole list as .remove only removes first instance + assume_ready = [ar for ar in assume_ready if ar != 'extra-inputs'] + + if "image" not in assume_ready: + extra_inputs.append(image_path) + else: + assume_ready = [ar for ar in assume_ready if ar != 'image'] lgr.debug("extra_inputs = %r", extra_inputs) + if not assume_ready: + assume_ready = None + elif len(assume_ready) == 1: + assume_ready = assume_ready[0] + elif "inputs" in assume_ready and "outputs" in assume_ready: + assume_ready = "both" + else: + raise ValueError(f"Ended up with assume_ready={assume_ready!r} which must have not happened") + with patch.dict('os.environ', {CONTAINER_NAME_ENVVAR: container['name']}): # fire! @@ -183,10 +212,11 @@ def __call__(cmd, container_name=None, dataset=None, cmd=cmd, dataset=dataset or (ds if ds.path == pwd else None), inputs=inputs, - extra_inputs=[image_path] + extra_inputs, + extra_inputs=extra_inputs, outputs=outputs, message=message, expand=expand, explicit=explicit, - sidecar=sidecar): + sidecar=sidecar, + assume_ready=assume_ready): yield r diff --git a/datalad_container/tests/test_run.py b/datalad_container/tests/test_run.py index c3aa6e0c..879e91e1 100644 --- a/datalad_container/tests/test_run.py +++ b/datalad_container/tests/test_run.py @@ -276,6 +276,79 @@ def test_extra_inputs(path=None): ) == set(runinfo.get("extra_inputs", set())) +@with_tree( + tree={ + "container.img": "image file", + "input.txt": "input data", + "overlay1.img": "overlay1", + } +) +def test_assume_ready(path=None): + ds = Dataset(path).create(force=True, **common_kwargs) + ds.containers_add( + "mycontainer", + image="container.img", + call_fmt="echo image={img} cmd={cmd} img_dspath={img_dspath} img_dirpath={img_dirpath} > out.log", + extra_input=["overlay1.img"], + **common_kwargs + ) + ds.save(**common_kwargs) + # assume image is ready + ds.containers_run( + "XXX", + container_name="mycontainer", + assume_ready=['image'], + **common_kwargs) + ok_file_has_content( + os.path.join(ds.repo.path, "out.log"), + "image=container.img", + re_=True, + ) + commit_msg = ds.repo.call_git(["show", "--format=%B"]) + cmd, runinfo = get_run_info(ds, commit_msg) + assert "container.img" not in runinfo.get("extra_inputs", []) + + # fails if erroneous assume_ready value + with pytest.raises(ValueError): + ds.containers_run( + "XXX", + inputs=['input.txt'], + container_name="mycontainer", + assume_ready=['inputsssstypo', 'outputs'], + **common_kwargs) + + # fail when output is assume ready but is not unlocked + with pytest.raises(IncompleteResultsError): + ds.containers_run( + "XXX", + inputs=['input.txt'], + outputs=['out.log'], + container_name="mycontainer", + assume_ready=['inputs', 'outputs'], + **common_kwargs) + + # assume inputs as ready, pass to regular `run` + ds.containers_run( + "YYY", + inputs=['input.txt'], + outputs=['out.log'], + container_name="mycontainer", + assume_ready=['inputs'], + **common_kwargs) + commit_msg = ds.repo.call_git(["show", "--format=%B"]) + cmd, runinfo = get_run_info(ds, commit_msg) + + ds.containers_run( + "ZZZ", + container_name="mycontainer", + outputs=['out.log'], + assume_ready=['extra-inputs'], + **common_kwargs) + commit_msg = ds.repo.call_git(["show", "--format=%B"]) + cmd, runinfo = get_run_info(ds, commit_msg) + assert 'overlay1.img' not in runinfo.get("extra_inputs", []) + + @skip_if_no_network @with_tree(tree={"subdir": {"in": "innards"}}) def test_run_no_explicit_dataset(path=None):