From 5b1799f905983054fbb26a00dc4f7cc6b8eb2e40 Mon Sep 17 00:00:00 2001 From: basile Date: Tue, 14 Mar 2023 14:46:54 -0400 Subject: [PATCH 1/8] add assume ready for image (and extra-inputs) --- datalad_container/containers_run.py | 67 +++++++++++++++++++---------- 1 file changed, 45 insertions(+), 22 deletions(-) diff --git a/datalad_container/containers_run.py b/datalad_container/containers_run.py index 749e764..6feffa4 100644 --- a/datalad_container/containers_run.py +++ b/datalad_container/containers_run.py @@ -26,6 +26,7 @@ from datalad.utils import ensure_iter from datalad_container.find_container import find_container_ +from datalad.support.constraints import EnsureChoice lgr = logging.getLogger("datalad.containers.containers_run") @@ -39,11 +40,21 @@ container_name=Parameter( args=('-n', '--container-name',), metavar="NAME", - doc="""Specify the name of or a path to a known container to use + doc="""Specify the name of or a path to a known container to use for execution, in case multiple containers are configured."""), + assume_ready=Parameter( + args=("--assume-ready",), + nargs="*", + constraints=EnsureChoice(None, "image", "inputs", "outputs", "extra-inputs"), + doc="""Assume that inputs do not need to be retrieved and/or outputs do not + need to unlocked or removed, or containers/extra-inputs to be retrieved, + before running the command. This option allows + you to avoid the expense of these preparation steps if you know that they + are unnecessary."""), ) + @build_doc # all commands must be derived from Interface class ContainersRun(Interface): @@ -79,7 +90,7 @@ class ContainersRun(Interface): @eval_results def __call__(cmd, container_name=None, dataset=None, inputs=None, outputs=None, message=None, expand=None, - explicit=False, sidecar=None): + explicit=False, sidecar=None, assume_ready=None): from unittest.mock import \ patch # delayed, since takes long (~600ms for yoh) pwd, _ = get_command_pwds(dataset) @@ -154,28 +165,39 @@ def __call__(cmd, container_name=None, dataset=None, # just prepend and pray cmd = container['path'] + ' ' + cmd + assume_ready = assume_ready or [] extra_inputs = [] - for extra_input in ensure_iter(container.get("extra-input",[]), set): - try: - xi_kwargs = dict( - img_dspath=image_dspath, - img_dirpath=op.dirname(image_path) or ".", - ) - extra_inputs.append(extra_input.format(**xi_kwargs)) - except KeyError as exc: - yield get_status_dict( - 'run', - ds=ds, - status='error', - message=( - 'Unrecognized extra_input placeholder: %s. ' - 'See containers-add for information on known ones: %s', - exc, - ", ".join(xi_kwargs))) - return + if not "extra-inputs" in assume_ready: + for extra_input in ensure_iter(container.get("extra-input",[]), set): + try: + xi_kwargs = dict( + img_dspath=image_dspath, + img_dirpath=op.dirname(image_path) or ".", + ) + extra_inputs.append(extra_input.format(**xi_kwargs)) + except KeyError as exc: + yield get_status_dict( + 'run', + ds=ds, + status='error', + message=( + 'Unrecognized extra_input placeholder: %s. ' + 'See containers-add for information on known ones: %s', + exc, + ", ".join(xi_kwargs))) + return + assume_ready = [ar for ar in assume_ready if ar != 'extra-inputs'] + + if not "image" in assume_ready: + extra_inputs.append(image_path) + assume_ready = [ar for ar in assume_ready if ar != 'image'] lgr.debug("extra_inputs = %r", extra_inputs) + # we can assume "both" with params choices and filtering above + if len(assume_ready) > 1: + assume_ready='both' + with patch.dict('os.environ', {CONTAINER_NAME_ENVVAR: container['name']}): # fire! @@ -183,10 +205,11 @@ def __call__(cmd, container_name=None, dataset=None, cmd=cmd, dataset=dataset or (ds if ds.path == pwd else None), inputs=inputs, - extra_inputs=[image_path] + extra_inputs, + extra_inputs=extra_inputs, outputs=outputs, message=message, expand=expand, explicit=explicit, - sidecar=sidecar): + sidecar=sidecar, + assume_ready=assume_ready): yield r From 528ca453fa330da5aa1506d10d0b9fa924d0b7ee Mon Sep 17 00:00:00 2001 From: basile Date: Wed, 15 Mar 2023 11:49:35 -0400 Subject: [PATCH 2/8] explicit check, convert list to string/None in all cases --- datalad_container/containers_run.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/datalad_container/containers_run.py b/datalad_container/containers_run.py index 6feffa4..5efd0f0 100644 --- a/datalad_container/containers_run.py +++ b/datalad_container/containers_run.py @@ -194,9 +194,12 @@ def __call__(cmd, container_name=None, dataset=None, lgr.debug("extra_inputs = %r", extra_inputs) - # we can assume "both" with params choices and filtering above - if len(assume_ready) > 1: - assume_ready='both' + if len(assume_ready) == 0: + assume_ready = None + elif len(assume_ready) == 1: + assume_ready = assume_ready[0] + elif "inputs" in assume_ready and "outputs" in assume_ready: + assume_ready = "both" with patch.dict('os.environ', {CONTAINER_NAME_ENVVAR: container['name']}): From b638433a518849ca68ce5c8712a33da6e4a95cc9 Mon Sep 17 00:00:00 2001 From: Basile Date: Tue, 21 Mar 2023 14:14:53 -0400 Subject: [PATCH 3/8] Update datalad_container/containers_run.py Co-authored-by: Yaroslav Halchenko --- datalad_container/containers_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datalad_container/containers_run.py b/datalad_container/containers_run.py index 5efd0f0..0222183 100644 --- a/datalad_container/containers_run.py +++ b/datalad_container/containers_run.py @@ -167,7 +167,7 @@ def __call__(cmd, container_name=None, dataset=None, assume_ready = assume_ready or [] extra_inputs = [] - if not "extra-inputs" in assume_ready: + if "extra-inputs" not in assume_ready: for extra_input in ensure_iter(container.get("extra-input",[]), set): try: xi_kwargs = dict( From 06dcb0d9ac5d0bc73de30180ebe3a409708683c7 Mon Sep 17 00:00:00 2001 From: basile Date: Tue, 21 Mar 2023 14:18:40 -0400 Subject: [PATCH 4/8] fixes following YH review --- datalad_container/containers_run.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/datalad_container/containers_run.py b/datalad_container/containers_run.py index 0222183..4d139d9 100644 --- a/datalad_container/containers_run.py +++ b/datalad_container/containers_run.py @@ -186,11 +186,14 @@ def __call__(cmd, container_name=None, dataset=None, exc, ", ".join(xi_kwargs))) return - assume_ready = [ar for ar in assume_ready if ar != 'extra-inputs'] + else: + # filter the whole list as .remove only removes first instance + assume_ready = [ar for ar in assume_ready if ar != 'extra-inputs'] - if not "image" in assume_ready: + if "image" not in assume_ready: extra_inputs.append(image_path) - assume_ready = [ar for ar in assume_ready if ar != 'image'] + else: + assume_ready = [ar for ar in assume_ready if ar != 'image'] lgr.debug("extra_inputs = %r", extra_inputs) From a7686a5bad9e250d3810bde424d79bba16edc5c5 Mon Sep 17 00:00:00 2001 From: Basile Date: Tue, 21 Mar 2023 14:18:09 -0400 Subject: [PATCH 5/8] Update datalad_container/containers_run.py Co-authored-by: Yaroslav Halchenko --- datalad_container/containers_run.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datalad_container/containers_run.py b/datalad_container/containers_run.py index 4d139d9..fb6887e 100644 --- a/datalad_container/containers_run.py +++ b/datalad_container/containers_run.py @@ -197,12 +197,14 @@ def __call__(cmd, container_name=None, dataset=None, lgr.debug("extra_inputs = %r", extra_inputs) - if len(assume_ready) == 0: + if not assume_ready: assume_ready = None elif len(assume_ready) == 1: assume_ready = assume_ready[0] elif "inputs" in assume_ready and "outputs" in assume_ready: assume_ready = "both" + else: + raise ValueError(f"Ended up with assume_ready={assume_ready!r} which must have not happened") with patch.dict('os.environ', {CONTAINER_NAME_ENVVAR: container['name']}): From 93267c975271c0b6e9d32497f65adce8a8fe6617 Mon Sep 17 00:00:00 2001 From: bpinsard Date: Tue, 19 Mar 2024 10:58:34 -0400 Subject: [PATCH 6/8] add tests for assume_ready --- datalad_container/tests/test_run.py | 73 +++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/datalad_container/tests/test_run.py b/datalad_container/tests/test_run.py index c3aa6e0..879e91e 100644 --- a/datalad_container/tests/test_run.py +++ b/datalad_container/tests/test_run.py @@ -276,6 +276,79 @@ def test_extra_inputs(path=None): ) == set(runinfo.get("extra_inputs", set())) +@with_tree( + tree={ + "container.img": "image file", + "input.txt": "input data", + "overlay1.img": "overlay1", + } +) +def test_assume_ready(path=None): + ds = Dataset(path).create(force=True, **common_kwargs) + ds.containers_add( + "mycontainer", + image="container.img", + call_fmt="echo image={img} cmd={cmd} img_dspath={img_dspath} img_dirpath={img_dirpath} > out.log", + extra_input=["overlay1.img"], + **common_kwargs + ) + ds.save(**common_kwargs) + # assume image is ready + ds.containers_run( + "XXX", + container_name="mycontainer", + assume_ready=['image'], + **common_kwargs) + ok_file_has_content( + os.path.join(ds.repo.path, "out.log"), + "image=container.img", + re_=True, + ) + commit_msg = ds.repo.call_git(["show", "--format=%B"]) + cmd, runinfo = get_run_info(ds, commit_msg) + assert "container.img" not in runinfo.get("extra_inputs", []) + + # fails if erroneous assume_ready value + with pytest.raises(ValueError): + ds.containers_run( + "XXX", + inputs=['input.txt'], + container_name="mycontainer", + assume_ready=['inputsssstypo', 'outputs'], + **common_kwargs) + + # fail when output is assume ready but is not unlocked + with pytest.raises(IncompleteResultsError): + ds.containers_run( + "XXX", + inputs=['input.txt'], + outputs=['out.log'], + container_name="mycontainer", + assume_ready=['inputs', 'outputs'], + **common_kwargs) + + # assume inputs as ready, pass to regular `run` + ds.containers_run( + "YYY", + inputs=['input.txt'], + outputs=['out.log'], + container_name="mycontainer", + assume_ready=['inputs'], + **common_kwargs) + commit_msg = ds.repo.call_git(["show", "--format=%B"]) + cmd, runinfo = get_run_info(ds, commit_msg) + + ds.containers_run( + "ZZZ", + container_name="mycontainer", + outputs=['out.log'], + assume_ready=['extra-inputs'], + **common_kwargs) + commit_msg = ds.repo.call_git(["show", "--format=%B"]) + cmd, runinfo = get_run_info(ds, commit_msg) + assert 'overlay1.img' not in runinfo.get("extra_inputs", []) + + @skip_if_no_network @with_tree(tree={"subdir": {"in": "innards"}}) def test_run_no_explicit_dataset(path=None): From 7308b164c1f18b15679680d6c3f095a61741ed85 Mon Sep 17 00:00:00 2001 From: DataLad Bot Date: Wed, 20 Mar 2024 12:55:51 +0000 Subject: [PATCH 7/8] [release-action] Autogenerate changelog snippet for PR 205 --- changelog.d/pr-205.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 changelog.d/pr-205.md diff --git a/changelog.d/pr-205.md b/changelog.d/pr-205.md new file mode 100644 index 0000000..4c9037a --- /dev/null +++ b/changelog.d/pr-205.md @@ -0,0 +1,3 @@ +### 🚀 Enhancements and New Features + +- Add --assume-ready for image and extra-inputs. [PR #205](https://github.com/datalad/datalad-container/pull/205) (by [@bpinsard](https://github.com/bpinsard)) From 380ead911f4e14a4e1b7037e39d7e2c1f0df7f18 Mon Sep 17 00:00:00 2001 From: Basile Date: Wed, 20 Mar 2024 09:14:03 -0400 Subject: [PATCH 8/8] Update datalad_container/containers_run.py Co-authored-by: Yaroslav Halchenko --- datalad_container/containers_run.py | 1 - 1 file changed, 1 deletion(-) diff --git a/datalad_container/containers_run.py b/datalad_container/containers_run.py index fb6887e..cb3f3e6 100644 --- a/datalad_container/containers_run.py +++ b/datalad_container/containers_run.py @@ -54,7 +54,6 @@ ) - @build_doc # all commands must be derived from Interface class ContainersRun(Interface):