From b2c7f857ebb8e9e5beec9e8eab882751c5c63740 Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Tue, 21 May 2024 14:26:06 +0100 Subject: [PATCH 01/11] feat(biomodels): complete adapter --- .../adapters/biomodelsadapter.py | 82 ++++++++++++++++--- 1 file changed, 71 insertions(+), 11 deletions(-) diff --git a/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py b/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py index fbc016f7..d325dccd 100644 --- a/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py +++ b/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py @@ -11,6 +11,8 @@ from .utils import add_to_tree +class BiomodelsException(Exception): + pass class BioModelsAdapter: @@ -23,33 +25,90 @@ class BioModelsAdapter: def __init__(self, osbrepository, uri=None): self.osbrepository = osbrepository self.uri = uri if uri else osbrepository.uri - # even for different figshare "instances", the IDs remain the same, and - # there's only one API end point - self.api_url = ... - - + self.api_url = "https://www.ebi.ac.uk/biomodels/" + + try: + self.model_id = re.search( + f"{self.api_url}/(.*?)$", + self.uri.strip("/")).group(1) + + except AttributeError: + raise BiomodelsException(f"{uri} is not a valid Figshare URL") + + def get_json(self, uri): + logger.debug(f"Getting: {uri}") + try: + r = requests.get( + uri, + params={"format": "json"} + ) + if r.status_code == 200: + return r.json() + else: + raise BiomodelsException( + f"Unexpected requests status code: {r.status_code}") + except Exception as e: + raise BiomodelsException("Unexpected error:", sys.exc_info()[0]) + + def get_base_uri(self): + return self.uri def get_contexts(self): - ... + result = self.get_json(self.uri) + revisions = result["history"]["revisions"] + return [str(v["version"]) for v in revisions] def get_resources(self, context): - ... - + logger.debug(f"Getting resources; {context}") + contents = self.get_json(f"{self.api_url}/model/files/{self.model_id}.{context}") + files = (contents["additional"] + contents["main"]) + + path = self.get_context_base_path(context) + + tree = RepositoryResourceNode( + resource=BioModelsRepositoryResource( + name="/", + path=path, + osbrepository_id=self.osbrepository.id, + ref=context, + ), + children=[], + ) + + for afile in files: + download_url = f"{self.api_url}/model/download/{afile['name']}" + add_to_tree( + tree=tree, + tree_path=[afile["name"]], + path=download_url, + size=afile["fileSize"], + osbrepository_id=self.osbrepository.id, + ) + + return tree def get_description(self, context): - ... + result = self.get_json(self.uri.strip() + f".{context}") + return result["description"] def get_tags(self, context): - ... + # using the format name for the moment, since they don't do explict + # tags/keywords + result = self.get_json(self.uri.strip() + f".{context}") + return result["format"]["name"] def create_copy_task(self, workspace_id, origins: List[ResourceOrigin]): tasks = [] import workspaces.service.workflow as workflow for origin in origins: path = origin.path - # no file tree in FigShare + # no file tree in Biomodels from the looks of it folder = self.osbrepository.name + # download everything: the handler will fetch the complete file list + # and download them all + if not path or path == "/": + path = self.model_id # username / password are optional and future usage, # e.g. for accessing non public repos @@ -62,3 +121,4 @@ def create_copy_task(self, workspace_id, origins: List[ResourceOrigin]): password="", )) return tasks + From 9b9fc9abaac1c99035754f4be4ad3f69b16d14ef Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Tue, 21 May 2024 14:26:31 +0100 Subject: [PATCH 02/11] feat(biomodels): add task script --- .../workspaces/tasks/biomodels-copy/run.sh | 30 ++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/applications/workspaces/tasks/biomodels-copy/run.sh b/applications/workspaces/tasks/biomodels-copy/run.sh index f2fcd74d..4988ba2b 100644 --- a/applications/workspaces/tasks/biomodels-copy/run.sh +++ b/applications/workspaces/tasks/biomodels-copy/run.sh @@ -1,3 +1,31 @@ #!/bin/bash -# TODO biomodels-copy logic. Is it a bash or python script? \ No newline at end of file +set -e + +# remove the pvc from the path (if it has one) +# and append the folder +export download_path=`echo $shared_directory | cut -d ":" -f 2`/"${folder}" + +timestamp="$(date +"%Y%m%d%H%M%S-biomodels")" + +mkdir -p "${download_path}" +cd "${download_path}" + +# if a file url is passed +if echo "${url}" | grep -E "https://" 2>&1 > /dev/null +then + echo Biomodels copy "${url}" to "${download_path}" + echo "${url}" > filelist + aria2c --retry-wait=2 --max-tries=5 --input-file=filelist --max-concurrent-downloads=5 --max-connection-per-server=5 --allow-overwrite "true" --auto-file-renaming "false" + rm filelist -f +else + # if the model id is passed, downloads the OMEX archive and unzips it + echo Biomodels copy all files of article "${url}" to "${download_path}" + # use ..="true" and ..="false" here, otherwise aria2c gets confused + aria2c --retry-wait=2 --max-tries=5 --max-concurrent-downloads=5 --max-connection-per-server=5 --allow-overwrite="true" --auto-file-renaming="false" --out="$timestamp.omex" "https://www.ebi.ac.uk/biomodels/model/download/${url}" + unzip -o "$timestamp.omex" && rm -f "$timestamp.omex" +fi + + +# fix permissions +chown -R 1000:100 "${download_path}" From 20b6c9907e80c41a8e38dc46850826bcadbd089f Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Tue, 21 May 2024 14:26:41 +0100 Subject: [PATCH 03/11] feat(biomodels): add readme for task script --- .../workspaces/tasks/biomodels-copy/README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 applications/workspaces/tasks/biomodels-copy/README.md diff --git a/applications/workspaces/tasks/biomodels-copy/README.md b/applications/workspaces/tasks/biomodels-copy/README.md new file mode 100644 index 00000000..feec14f0 --- /dev/null +++ b/applications/workspaces/tasks/biomodels-copy/README.md @@ -0,0 +1,16 @@ +# Biomodels copy task + + +How to test + +``` +shared_directory=/tmp folder=osbv2/develop url=BIOMD0000000998.9 ./run.sh +``` + +The above should checkout the file README.md and the full directory applications/workspaces inside /tmp/osbv2/develop + + +``` +shared_directory=/tmp folder=osbv2/develop url=https://github.com/OpenSourceBrain/OSBv2 branch=develop paths= ./run.sh +``` +This should checkout the whole repo From 89ff54f6f9267ea187cd49c2dd966fc9c70225ee Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Tue, 21 May 2024 16:05:02 +0100 Subject: [PATCH 04/11] feat(biomodels): install apk packages --- applications/workspaces/tasks/biomodels-copy/Dockerfile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/applications/workspaces/tasks/biomodels-copy/Dockerfile b/applications/workspaces/tasks/biomodels-copy/Dockerfile index c7288c8e..f59d0fb5 100644 --- a/applications/workspaces/tasks/biomodels-copy/Dockerfile +++ b/applications/workspaces/tasks/biomodels-copy/Dockerfile @@ -1,6 +1,10 @@ -FROM python:3.9 +ARG CLOUDHARNESS_BASE +FROM $CLOUDHARNESS_BASE + +# much faster than curl/wget +# https://pkgs.alpinelinux.org/packages?name=aria2&branch=edge +RUN apk add aria2 unzip -RUN pip install --no-cache-dir #TODO ADD . / From 2ff1abc2efe1bd9db38222f8ff81b9d92508d720 Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Tue, 21 May 2024 16:38:42 +0100 Subject: [PATCH 05/11] fix(biomodels): `BioModelsRepo..` -> `BiomodelsRepo..` --- applications/workspaces/server/workspaces/models/__init__.py | 2 +- .../service/osbrepository/adapters/biomodelsadapter.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/applications/workspaces/server/workspaces/models/__init__.py b/applications/workspaces/server/workspaces/models/__init__.py index e20fd424..9a1bd7f5 100644 --- a/applications/workspaces/server/workspaces/models/__init__.py +++ b/applications/workspaces/server/workspaces/models/__init__.py @@ -45,4 +45,4 @@ from workspaces.models.workspace_resource_entity import WorkspaceResourceEntity from workspaces.models.workspace_resource_entity_all_of import WorkspaceResourceEntityAllOf from workspaces.models.repository_info import RepositoryInfo -from workspaces.models.biomodels_repository_resource import BioModelsRepositoryResource \ No newline at end of file +from workspaces.models.biomodels_repository_resource import BiomodelsRepositoryResource diff --git a/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py b/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py index d325dccd..70d4f364 100644 --- a/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py +++ b/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py @@ -6,7 +6,7 @@ from cloudharness import log as logger from workspaces.models import RepositoryResourceNode, RepositoryInfo from workspaces.models.resource_origin import ResourceOrigin -from workspaces.models.biomodels_repository_resource import BioModelsRepositoryResource +from workspaces.models.biomodels_repository_resource import BiomodelsRepositoryResource from .utils import add_to_tree @@ -66,7 +66,7 @@ def get_resources(self, context): path = self.get_context_base_path(context) tree = RepositoryResourceNode( - resource=BioModelsRepositoryResource( + resource=BiomodelsRepositoryResource( name="/", path=path, osbrepository_id=self.osbrepository.id, From fc64597010b19997af33eee3612d84619bddc973 Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Wed, 22 May 2024 10:18:17 +0100 Subject: [PATCH 06/11] feat(biomodels): add adapter to services --- .../service/osbrepository/adapters/biomodelsadapter.py | 6 +++--- .../service/osbrepository/osbrepository_service.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py b/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py index 70d4f364..31e820ca 100644 --- a/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py +++ b/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py @@ -15,11 +15,11 @@ class BiomodelsException(Exception): pass -class BioModelsAdapter: +class BiomodelsAdapter: """ - Adapter for FigShare + Adapter for Biomodels - https://docs.figshare.com/ + https://www.ebi.ac.uk/biomodels/ """ def __init__(self, osbrepository, uri=None): diff --git a/applications/workspaces/server/workspaces/service/osbrepository/osbrepository_service.py b/applications/workspaces/server/workspaces/service/osbrepository/osbrepository_service.py index 57131e44..8906c508 100644 --- a/applications/workspaces/server/workspaces/service/osbrepository/osbrepository_service.py +++ b/applications/workspaces/server/workspaces/service/osbrepository/osbrepository_service.py @@ -5,7 +5,7 @@ -from workspaces.service.osbrepository.adapters import DandiAdapter, FigShareAdapter, GitHubAdapter +from workspaces.service.osbrepository.adapters import DandiAdapter, FigShareAdapter, GitHubAdapter, BiomodelsAdapter def get_repository_adapter(osbrepository: OSBRepository=None, repository_type=None, uri=None, *args, **kwargs): @@ -19,7 +19,7 @@ def get_repository_adapter(osbrepository: OSBRepository=None, repository_type=No elif repository_type == "figshare": return FigShareAdapter(*args, osbrepository=osbrepository, uri=uri, **kwargs) elif repository_type == "biomodels": - return BioModelsAdapter(*args, osbrepository=osbrepository, uri=uri, **kwargs) + return BiomodelsAdapter(*args, osbrepository=osbrepository, uri=uri, **kwargs) return None From f1e2c61315b016ce23aed74f1c3e61479c1f6592 Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Wed, 22 May 2024 10:21:41 +0100 Subject: [PATCH 07/11] chore(adapters): `BioModels` -> `Biomodels` --- .../workspaces/service/osbrepository/adapters/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/applications/workspaces/server/workspaces/service/osbrepository/adapters/__init__.py b/applications/workspaces/server/workspaces/service/osbrepository/adapters/__init__.py index c4115ac8..332728c2 100644 --- a/applications/workspaces/server/workspaces/service/osbrepository/adapters/__init__.py +++ b/applications/workspaces/server/workspaces/service/osbrepository/adapters/__init__.py @@ -1,4 +1,4 @@ from workspaces.service.osbrepository.adapters.dandiadapter import DandiAdapter from workspaces.service.osbrepository.adapters.figshareadapter import FigShareAdapter from workspaces.service.osbrepository.adapters.githubadapter import GitHubAdapter -from workspaces.service.osbrepository.adapters.biomodelsadapter import BioModelsAdapter \ No newline at end of file +from workspaces.service.osbrepository.adapters.biomodelsadapter import BiomodelsAdapter From ad0d4dfcceb1f7380608a7adc86fb45eb5668fca Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Wed, 22 May 2024 11:13:25 +0100 Subject: [PATCH 08/11] fix(biomodels): add `get_info` --- .../osbrepository/adapters/biomodelsadapter.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py b/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py index 31e820ca..a6f63c5a 100644 --- a/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py +++ b/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py @@ -25,15 +25,15 @@ class BiomodelsAdapter: def __init__(self, osbrepository, uri=None): self.osbrepository = osbrepository self.uri = uri if uri else osbrepository.uri - self.api_url = "https://www.ebi.ac.uk/biomodels/" + self.api_url = "https://www.ebi.ac.uk/biomodels" try: self.model_id = re.search( - f"{self.api_url}/(.*?)$", + f"{self.api_url}/(\\w+)", self.uri.strip("/")).group(1) except AttributeError: - raise BiomodelsException(f"{uri} is not a valid Figshare URL") + raise BiomodelsException(f"{uri} is not a valid Biomodels URL") def get_json(self, uri): logger.debug(f"Getting: {uri}") @@ -53,6 +53,11 @@ def get_json(self, uri): def get_base_uri(self): return self.uri + def get_info(self) -> RepositoryInfo: + info = self.get_json( + f"{self.api_url}/{self.model_id}") + return RepositoryInfo(name=info["name"], contexts=self.get_contexts(), tags=info["format"]["name"], summary=info.get("description", "")) + def get_contexts(self): result = self.get_json(self.uri) revisions = result["history"]["revisions"] From 72428d84e99a20dc43fa8fcfba48e91ad3b4e8c1 Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Wed, 22 May 2024 12:25:43 +0100 Subject: [PATCH 09/11] feat(biomodels): update repository resources --- .../models/biomodels_repository_resource.py | 62 +++- .../workspaces/models/repository_resource.py | 8 +- .../service/osbrepository/__init__.py | 4 +- .../adapters/biomodelsadapter.py | 23 +- .../model/biomodels_repository_resource.py | 327 ++++++++++++++++++ .../model/repository_resource.py | 2 + 6 files changed, 409 insertions(+), 17 deletions(-) create mode 100644 libraries/client/workspaces/workspaces_cli/model/biomodels_repository_resource.py diff --git a/applications/workspaces/server/workspaces/models/biomodels_repository_resource.py b/applications/workspaces/server/workspaces/models/biomodels_repository_resource.py index d022e3d8..087d3313 100644 --- a/applications/workspaces/server/workspaces/models/biomodels_repository_resource.py +++ b/applications/workspaces/server/workspaces/models/biomodels_repository_resource.py @@ -17,7 +17,7 @@ class BiomodelsRepositoryResource(Model): Do not edit the class manually. """ - def __init__(self, name=None, path=None, osbrepository_id=None, size=None, timestamp_modified=None): # noqa: E501 + def __init__(self, name=None, path=None, osbrepository_id=None, size=None, timestamp_modified=None, ref=None, sha=None): # noqa: E501 """BiomodelsRepositoryResource - a model defined in OpenAPI :param name: The name of this BiomodelsRepositoryResource. # noqa: E501 @@ -30,13 +30,19 @@ def __init__(self, name=None, path=None, osbrepository_id=None, size=None, times :type size: int :param timestamp_modified: The timestamp_modified of this BiomodelsRepositoryResource. # noqa: E501 :type timestamp_modified: datetime + :param ref: The ref of this BiomodelsRepositoryResource. # noqa: E501 + :type ref: str + :param sha: The sha of this BiomodelsRepositoryResource. # noqa: E501 + :type sha: str """ self.openapi_types = { 'name': str, 'path': str, 'osbrepository_id': int, 'size': int, - 'timestamp_modified': datetime + 'timestamp_modified': datetime, + 'ref': str, + 'sha': str } self.attribute_map = { @@ -44,7 +50,9 @@ def __init__(self, name=None, path=None, osbrepository_id=None, size=None, times 'path': 'path', 'osbrepository_id': 'osbrepository_id', 'size': 'size', - 'timestamp_modified': 'timestamp_modified' + 'timestamp_modified': 'timestamp_modified', + 'ref': 'ref', + 'sha': 'sha' } self._name = name @@ -52,6 +60,8 @@ def __init__(self, name=None, path=None, osbrepository_id=None, size=None, times self._osbrepository_id = osbrepository_id self._size = size self._timestamp_modified = timestamp_modified + self._ref = ref + self._sha = sha @classmethod def from_dict(cls, dikt) -> 'BiomodelsRepositoryResource': @@ -178,3 +188,49 @@ def timestamp_modified(self, timestamp_modified): """ self._timestamp_modified = timestamp_modified + + @property + def ref(self): + """Gets the ref of this BiomodelsRepositoryResource. + + The GIT ref # noqa: E501 + + :return: The ref of this BiomodelsRepositoryResource. + :rtype: str + """ + return self._ref + + @ref.setter + def ref(self, ref): + """Sets the ref of this BiomodelsRepositoryResource. + + The GIT ref # noqa: E501 + + :param ref: The ref of this BiomodelsRepositoryResource. + :type ref: str + """ + + self._ref = ref + + @property + def sha(self): + """Gets the sha of this BiomodelsRepositoryResource. + + The GIT sha of the resource # noqa: E501 + + :return: The sha of this BiomodelsRepositoryResource. + :rtype: str + """ + return self._sha + + @sha.setter + def sha(self, sha): + """Sets the sha of this BiomodelsRepositoryResource. + + The GIT sha of the resource # noqa: E501 + + :param sha: The sha of this BiomodelsRepositoryResource. + :type sha: str + """ + + self._sha = sha diff --git a/applications/workspaces/server/workspaces/models/repository_resource.py b/applications/workspaces/server/workspaces/models/repository_resource.py index fd3d4d7b..80b3c93f 100644 --- a/applications/workspaces/server/workspaces/models/repository_resource.py +++ b/applications/workspaces/server/workspaces/models/repository_resource.py @@ -6,14 +6,12 @@ from typing import List, Dict # noqa: F401 from workspaces.models.base_model_ import Model -from workspaces.models.dandi_repository_resource import DandiRepositoryResource -from workspaces.models.figshare_repository_resource import FigshareRepositoryResource -from workspaces.models.git_repository_resource import GITRepositoryResource -from workspaces import util - from workspaces.models.dandi_repository_resource import DandiRepositoryResource # noqa: E501 from workspaces.models.figshare_repository_resource import FigshareRepositoryResource # noqa: E501 from workspaces.models.git_repository_resource import GITRepositoryResource # noqa: E501 +from workspaces.models.biomodels_repository_resource import BiomodelsRepositoryResource # noqa: E501 +from workspaces import util + class RepositoryResource(Model): """NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). diff --git a/applications/workspaces/server/workspaces/service/osbrepository/__init__.py b/applications/workspaces/server/workspaces/service/osbrepository/__init__.py index bddeb4c6..57ee3095 100644 --- a/applications/workspaces/server/workspaces/service/osbrepository/__init__.py +++ b/applications/workspaces/server/workspaces/service/osbrepository/__init__.py @@ -7,7 +7,7 @@ -from workspaces.service.osbrepository.adapters import DandiAdapter, FigShareAdapter, GitHubAdapter +from workspaces.service.osbrepository.adapters import DandiAdapter, FigShareAdapter, GitHubAdapter, BiomodelsAdapter def get_repository_adapter(osbrepository: OSBRepository=None, repository_type=None, uri=None, *args, **kwargs): @@ -20,6 +20,8 @@ def get_repository_adapter(osbrepository: OSBRepository=None, repository_type=No return DandiAdapter(*args, osbrepository=osbrepository, uri=uri, **kwargs) elif repository_type == "figshare": return FigShareAdapter(*args, osbrepository=osbrepository, uri=uri, **kwargs) + elif repository_type == "biomodels": + return BiomodelsAdapter(*args, osbrepository=osbrepository, uri=uri, **kwargs) return None diff --git a/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py b/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py index a6f63c5a..a654fd03 100644 --- a/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py +++ b/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py @@ -59,16 +59,16 @@ def get_info(self) -> RepositoryInfo: return RepositoryInfo(name=info["name"], contexts=self.get_contexts(), tags=info["format"]["name"], summary=info.get("description", "")) def get_contexts(self): - result = self.get_json(self.uri) + result = self.get_json(f"{self.api_url}/{self.model_id}") revisions = result["history"]["revisions"] return [str(v["version"]) for v in revisions] def get_resources(self, context): - logger.debug(f"Getting resources; {context}") - contents = self.get_json(f"{self.api_url}/model/files/{self.model_id}.{context}") - files = (contents["additional"] + contents["main"]) + logger.debug(f"Getting resources: {context}") + path = f"{self.api_url}/model/files/{self.model_id}.{context}" + contents = self.get_json(path) + files = (contents.get("additional", []) + contents.get("main", [])) - path = self.get_context_base_path(context) tree = RepositoryResourceNode( resource=BiomodelsRepositoryResource( @@ -93,13 +93,20 @@ def get_resources(self, context): return tree def get_description(self, context): - result = self.get_json(self.uri.strip() + f".{context}") - return result["description"] + logger.debug(f"Getting description: {context}") + try: + result = self.get_json(f"{self.api_url}/{self.model_id}.{context}") + return result["description"] + except Exception as e: + logger.debug( + "unable to get the description from biomodels, %", str(e)) + return "" def get_tags(self, context): # using the format name for the moment, since they don't do explict # tags/keywords - result = self.get_json(self.uri.strip() + f".{context}") + logger.debug(f"Getting tags: {context}") + result = self.get_json(f"{self.api_url}/{self.model_id}.{context}") return result["format"]["name"] def create_copy_task(self, workspace_id, origins: List[ResourceOrigin]): diff --git a/libraries/client/workspaces/workspaces_cli/model/biomodels_repository_resource.py b/libraries/client/workspaces/workspaces_cli/model/biomodels_repository_resource.py new file mode 100644 index 00000000..9af53947 --- /dev/null +++ b/libraries/client/workspaces/workspaces_cli/model/biomodels_repository_resource.py @@ -0,0 +1,327 @@ +""" + Workspaces manager API + + Opensource Brain Platform - Reference Workspaces manager API # noqa: E501 + + The version of the OpenAPI document: 0.2.0 + Generated by: https://openapi-generator.tech +""" + + +import re # noqa: F401 +import sys # noqa: F401 + +from workspaces_cli.model_utils import ( # noqa: F401 + ApiTypeError, + ModelComposed, + ModelNormal, + ModelSimple, + cached_property, + change_keys_js_to_python, + convert_js_args_to_python_args, + date, + datetime, + file_type, + none_type, + validate_get_composed_info, +) +from ..model_utils import OpenApiModel +from workspaces_cli.exceptions import ApiAttributeError + + +def lazy_import(): + from workspaces_cli.model.repository_resource_base import RepositoryResourceBase + globals()['RepositoryResourceBase'] = RepositoryResourceBase + + +class BiomodelsRepositoryResource(ModelComposed): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + + Attributes: + allowed_values (dict): The key is the tuple path to the attribute + and the for var_name this is (var_name,). The value is a dict + with a capitalized key describing the allowed value and an allowed + value. These dicts store the allowed enum values. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + discriminator_value_class_map (dict): A dict to go from the discriminator + variable value to the discriminator class name. + validations (dict): The key is the tuple path to the attribute + and the for var_name this is (var_name,). The value is a dict + that stores validations for max_length, min_length, max_items, + min_items, exclusive_maximum, inclusive_maximum, exclusive_minimum, + inclusive_minimum, and regex. + additional_properties_type (tuple): A tuple of classes accepted + as additional properties values. + """ + + allowed_values = { + } + + validations = { + } + + @cached_property + def additional_properties_type(): + """ + This must be a method because a model may have properties that are + of type self, this must run after the class is loaded + """ + lazy_import() + return (bool, date, datetime, dict, float, int, list, str, none_type,) # noqa: E501 + + _nullable = False + + @cached_property + def openapi_types(): + """ + This must be a method because a model may have properties that are + of type self, this must run after the class is loaded + + Returns + openapi_types (dict): The key is attribute name + and the value is attribute type. + """ + lazy_import() + return { + 'name': (str,), # noqa: E501 + 'path': (str,), # noqa: E501 + 'osbrepository_id': (int,), # noqa: E501 + 'size': (int,), # noqa: E501 + 'timestamp_modified': (datetime,), # noqa: E501 + } + + @cached_property + def discriminator(): + return None + + + attribute_map = { + 'name': 'name', # noqa: E501 + 'path': 'path', # noqa: E501 + 'osbrepository_id': 'osbrepository_id', # noqa: E501 + 'size': 'size', # noqa: E501 + 'timestamp_modified': 'timestamp_modified', # noqa: E501 + } + + read_only_vars = { + } + + @classmethod + @convert_js_args_to_python_args + def _from_openapi_data(cls, *args, **kwargs): # noqa: E501 + """BiomodelsRepositoryResource - a model defined in OpenAPI + + Keyword Args: + _check_type (bool): if True, values for parameters in openapi_types + will be type checked and a TypeError will be + raised if the wrong type is input. + Defaults to True + _path_to_item (tuple/list): This is a list of keys or values to + drill down to the model in received_data + when deserializing a response + _spec_property_naming (bool): True if the variable names in the input data + are serialized names, as specified in the OpenAPI document. + False if the variable names in the input data + are pythonic names, e.g. snake case (default) + _configuration (Configuration): the instance to use when + deserializing a file_type parameter. + If passed, type conversion is attempted + If omitted no type conversion is done. + _visited_composed_classes (tuple): This stores a tuple of + classes that we have traveled through so that + if we see that class again we will not use its + discriminator again. + When traveling through a discriminator, the + composed schema that is + is traveled through is added to this set. + For example if Animal has a discriminator + petType and we pass in "Dog", and the class Dog + allOf includes Animal, we move through Animal + once using the discriminator, and pick Dog. + Then in Dog, we will make an instance of the + Animal class but this time we won't travel + through its discriminator because we passed in + _visited_composed_classes = (Animal,) + name (str): file name. [optional] # noqa: E501 + path (str): Download URL of the Resource. [optional] # noqa: E501 + osbrepository_id (int): OSB Repository id. [optional] # noqa: E501 + size (int): File size in bytes of the RepositoryResource. [optional] # noqa: E501 + timestamp_modified (datetime): Date/time the ReposityResource is last modified. [optional] # noqa: E501 + """ + + _check_type = kwargs.pop('_check_type', True) + _spec_property_naming = kwargs.pop('_spec_property_naming', False) + _path_to_item = kwargs.pop('_path_to_item', ()) + _configuration = kwargs.pop('_configuration', None) + _visited_composed_classes = kwargs.pop('_visited_composed_classes', ()) + + self = super(OpenApiModel, cls).__new__(cls) + + if args: + raise ApiTypeError( + "Invalid positional arguments=%s passed to %s. Remove those invalid positional arguments." % ( + args, + self.__class__.__name__, + ), + path_to_item=_path_to_item, + valid_classes=(self.__class__,), + ) + + self._data_store = {} + self._check_type = _check_type + self._spec_property_naming = _spec_property_naming + self._path_to_item = _path_to_item + self._configuration = _configuration + self._visited_composed_classes = _visited_composed_classes + (self.__class__,) + + constant_args = { + '_check_type': _check_type, + '_path_to_item': _path_to_item, + '_spec_property_naming': _spec_property_naming, + '_configuration': _configuration, + '_visited_composed_classes': self._visited_composed_classes, + } + composed_info = validate_get_composed_info( + constant_args, kwargs, self) + self._composed_instances = composed_info[0] + self._var_name_to_model_instances = composed_info[1] + self._additional_properties_model_instances = composed_info[2] + discarded_args = composed_info[3] + + for var_name, var_value in kwargs.items(): + if var_name in discarded_args and \ + self._configuration is not None and \ + self._configuration.discard_unknown_keys and \ + self._additional_properties_model_instances: + # discard variable. + continue + setattr(self, var_name, var_value) + + return self + + required_properties = set([ + '_data_store', + '_check_type', + '_spec_property_naming', + '_path_to_item', + '_configuration', + '_visited_composed_classes', + '_composed_instances', + '_var_name_to_model_instances', + '_additional_properties_model_instances', + ]) + + @convert_js_args_to_python_args + def __init__(self, *args, **kwargs): # noqa: E501 + """BiomodelsRepositoryResource - a model defined in OpenAPI + + Keyword Args: + _check_type (bool): if True, values for parameters in openapi_types + will be type checked and a TypeError will be + raised if the wrong type is input. + Defaults to True + _path_to_item (tuple/list): This is a list of keys or values to + drill down to the model in received_data + when deserializing a response + _spec_property_naming (bool): True if the variable names in the input data + are serialized names, as specified in the OpenAPI document. + False if the variable names in the input data + are pythonic names, e.g. snake case (default) + _configuration (Configuration): the instance to use when + deserializing a file_type parameter. + If passed, type conversion is attempted + If omitted no type conversion is done. + _visited_composed_classes (tuple): This stores a tuple of + classes that we have traveled through so that + if we see that class again we will not use its + discriminator again. + When traveling through a discriminator, the + composed schema that is + is traveled through is added to this set. + For example if Animal has a discriminator + petType and we pass in "Dog", and the class Dog + allOf includes Animal, we move through Animal + once using the discriminator, and pick Dog. + Then in Dog, we will make an instance of the + Animal class but this time we won't travel + through its discriminator because we passed in + _visited_composed_classes = (Animal,) + name (str): file name. [optional] # noqa: E501 + path (str): Download URL of the Resource. [optional] # noqa: E501 + osbrepository_id (int): OSB Repository id. [optional] # noqa: E501 + size (int): File size in bytes of the RepositoryResource. [optional] # noqa: E501 + timestamp_modified (datetime): Date/time the ReposityResource is last modified. [optional] # noqa: E501 + """ + + _check_type = kwargs.pop('_check_type', True) + _spec_property_naming = kwargs.pop('_spec_property_naming', False) + _path_to_item = kwargs.pop('_path_to_item', ()) + _configuration = kwargs.pop('_configuration', None) + _visited_composed_classes = kwargs.pop('_visited_composed_classes', ()) + + if args: + raise ApiTypeError( + "Invalid positional arguments=%s passed to %s. Remove those invalid positional arguments." % ( + args, + self.__class__.__name__, + ), + path_to_item=_path_to_item, + valid_classes=(self.__class__,), + ) + + self._data_store = {} + self._check_type = _check_type + self._spec_property_naming = _spec_property_naming + self._path_to_item = _path_to_item + self._configuration = _configuration + self._visited_composed_classes = _visited_composed_classes + (self.__class__,) + + constant_args = { + '_check_type': _check_type, + '_path_to_item': _path_to_item, + '_spec_property_naming': _spec_property_naming, + '_configuration': _configuration, + '_visited_composed_classes': self._visited_composed_classes, + } + composed_info = validate_get_composed_info( + constant_args, kwargs, self) + self._composed_instances = composed_info[0] + self._var_name_to_model_instances = composed_info[1] + self._additional_properties_model_instances = composed_info[2] + discarded_args = composed_info[3] + + for var_name, var_value in kwargs.items(): + if var_name in discarded_args and \ + self._configuration is not None and \ + self._configuration.discard_unknown_keys and \ + self._additional_properties_model_instances: + # discard variable. + continue + setattr(self, var_name, var_value) + if var_name in self.read_only_vars: + raise ApiAttributeError(f"`{var_name}` is a read-only attribute. Use `from_openapi_data` to instantiate " + f"class with read only attributes.") + + @cached_property + def _composed_schemas(): + # we need this here to make our import statements work + # we must store _composed_schemas in here so the code is only run + # when we invoke this method. If we kept this at the class + # level we would get an error beause the class level + # code would be run when this module is imported, and these composed + # classes don't exist yet because their module has not finished + # loading + lazy_import() + return { + 'anyOf': [ + ], + 'allOf': [ + RepositoryResourceBase, + ], + 'oneOf': [ + ], + } diff --git a/libraries/client/workspaces/workspaces_cli/model/repository_resource.py b/libraries/client/workspaces/workspaces_cli/model/repository_resource.py index 50084e7b..6d2b4239 100644 --- a/libraries/client/workspaces/workspaces_cli/model/repository_resource.py +++ b/libraries/client/workspaces/workspaces_cli/model/repository_resource.py @@ -33,9 +33,11 @@ def lazy_import(): from workspaces_cli.model.dandi_repository_resource import DandiRepositoryResource from workspaces_cli.model.figshare_repository_resource import FigshareRepositoryResource from workspaces_cli.model.git_repository_resource import GITRepositoryResource + from workspaces_cli.model.biomodels_repository_resource import BiomodelsRepositoryResource globals()['DandiRepositoryResource'] = DandiRepositoryResource globals()['FigshareRepositoryResource'] = FigshareRepositoryResource globals()['GITRepositoryResource'] = GITRepositoryResource + globals()['BiomodelsRepositoryResource'] = BiomodelsRepositoryResource class RepositoryResource(ModelComposed): From 8188d7255413f16018d7cf97961baf11026e2cbb Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Wed, 22 May 2024 15:22:13 +0100 Subject: [PATCH 10/11] feat(biomodels): update file download URL --- .../osb-portal/src/components/repository/resources.ts | 1 + applications/osb-portal/src/pages/RepositoryPage.tsx | 7 +++++++ .../service/osbrepository/adapters/biomodelsadapter.py | 9 ++++----- .../workspaces_cli/model/repository_resource.py | 1 + .../client/workspaces/workspaces_cli/models/__init__.py | 1 + 5 files changed, 14 insertions(+), 5 deletions(-) diff --git a/applications/osb-portal/src/components/repository/resources.ts b/applications/osb-portal/src/components/repository/resources.ts index 64e16ef3..5d63a09b 100644 --- a/applications/osb-portal/src/components/repository/resources.ts +++ b/applications/osb-portal/src/components/repository/resources.ts @@ -3,4 +3,5 @@ import { RepositoryType } from "../../apiclient/workspaces"; export default { [RepositoryType.Dandi]: "DANDI Archive", [RepositoryType.Github]: "GitHub", + [RepositoryType.Biomodels]: "Biomodels", } as any; diff --git a/applications/osb-portal/src/pages/RepositoryPage.tsx b/applications/osb-portal/src/pages/RepositoryPage.tsx index 4e4135f0..e540a16f 100644 --- a/applications/osb-portal/src/pages/RepositoryPage.tsx +++ b/applications/osb-portal/src/pages/RepositoryPage.tsx @@ -257,6 +257,13 @@ export const RepositoryPage = (props: any) => { case "figshare": window.open(`${repository.uri}`, "_blank"); break; + // Biomodels: repo.version + case "biomodels": + window.open( + `${repository.uri + "." + repository.defaultContext}`, + "_blank" + ); + break; default: window.open(`#`, "_blank"); } diff --git a/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py b/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py index a654fd03..6d950fef 100644 --- a/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py +++ b/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py @@ -65,15 +65,14 @@ def get_contexts(self): def get_resources(self, context): logger.debug(f"Getting resources: {context}") - path = f"{self.api_url}/model/files/{self.model_id}.{context}" - contents = self.get_json(path) + contents = self.get_json(f"{self.api_url}/model/files/{self.model_id}.{context}") files = (contents.get("additional", []) + contents.get("main", [])) tree = RepositoryResourceNode( resource=BiomodelsRepositoryResource( name="/", - path=path, + path="/", osbrepository_id=self.osbrepository.id, ref=context, ), @@ -81,12 +80,12 @@ def get_resources(self, context): ) for afile in files: - download_url = f"{self.api_url}/model/download/{afile['name']}" + download_url = f"{self.api_url}/model/download/{self.model_id}.{context}?filename={afile['name']}" add_to_tree( tree=tree, tree_path=[afile["name"]], path=download_url, - size=afile["fileSize"], + size=int(afile["fileSize"]), osbrepository_id=self.osbrepository.id, ) diff --git a/libraries/client/workspaces/workspaces_cli/model/repository_resource.py b/libraries/client/workspaces/workspaces_cli/model/repository_resource.py index 6d2b4239..f96f0ca5 100644 --- a/libraries/client/workspaces/workspaces_cli/model/repository_resource.py +++ b/libraries/client/workspaces/workspaces_cli/model/repository_resource.py @@ -339,5 +339,6 @@ def _composed_schemas(): DandiRepositoryResource, FigshareRepositoryResource, GITRepositoryResource, + BiomodelsRepositoryResource, ], } diff --git a/libraries/client/workspaces/workspaces_cli/models/__init__.py b/libraries/client/workspaces/workspaces_cli/models/__init__.py index 0b3f45f2..f274a30a 100644 --- a/libraries/client/workspaces/workspaces_cli/models/__init__.py +++ b/libraries/client/workspaces/workspaces_cli/models/__init__.py @@ -13,6 +13,7 @@ from workspaces_cli.model.download_resource import DownloadResource from workspaces_cli.model.figshare_repository_resource import FigshareRepositoryResource from workspaces_cli.model.git_repository_resource import GITRepositoryResource +from workspaces_cli.model.biomodels_repository_resource import BiomodelsRepositoryResource from workspaces_cli.model.git_repository_resource_all_of import GITRepositoryResourceAllOf from workspaces_cli.model.inline_object import InlineObject from workspaces_cli.model.inline_response200 import InlineResponse200 From fc48f9ebc2e7cda3f758b0b648fe6f4644264fc9 Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Wed, 22 May 2024 17:00:55 +0100 Subject: [PATCH 11/11] feat(biomodels): get complete file list to download ... ... instead of using the archive download The archive is generated on the fly, and it requires the user to check if the archive has been created, which we cannot do in an automated fashion. --- .../adapters/biomodelsadapter.py | 60 +++++++++++-------- .../workspaces/tasks/biomodels-copy/run.sh | 31 ++++++---- 2 files changed, 55 insertions(+), 36 deletions(-) diff --git a/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py b/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py index 6d950fef..7bcc1ff3 100644 --- a/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py +++ b/applications/workspaces/server/workspaces/service/osbrepository/adapters/biomodelsadapter.py @@ -63,11 +63,15 @@ def get_contexts(self): revisions = result["history"]["revisions"] return [str(v["version"]) for v in revisions] - def get_resources(self, context): - logger.debug(f"Getting resources: {context}") + def _get_filelist(self, context): + logger.debug(f"Getting filelist: {context}") contents = self.get_json(f"{self.api_url}/model/files/{self.model_id}.{context}") files = (contents.get("additional", []) + contents.get("main", [])) + return files + def get_resources(self, context): + logger.debug(f"Getting resources: {context}") + files = self._get_filelist(context) tree = RepositoryResourceNode( resource=BiomodelsRepositoryResource( @@ -108,28 +112,36 @@ def get_tags(self, context): result = self.get_json(f"{self.api_url}/{self.model_id}.{context}") return result["format"]["name"] + # biomodels files are usually small, so one task is enough def create_copy_task(self, workspace_id, origins: List[ResourceOrigin]): - tasks = [] import workspaces.service.workflow as workflow - for origin in origins: - path = origin.path - # no file tree in Biomodels from the looks of it - folder = self.osbrepository.name - - # download everything: the handler will fetch the complete file list - # and download them all - if not path or path == "/": - path = self.model_id - - # username / password are optional and future usage, - # e.g. for accessing non public repos - tasks.append(workflow.create_copy_task( - image_name="workspaces-biomodels-copy", - workspace_id=workspace_id, - folder=folder, - url=path, - username="", - password="", - )) - return tasks + # no file tree in Biomodels from the looks of it + folder = self.osbrepository.name + + # if nothing is selected, origins has one entry with path "/" + # we get the file list and download individual files + # Biomodels does allow downloading the archive, but that is generated + # on the fly and can require us to wait for an unspecified amount of + # time + if len(origins) == 1 and origins[0].path == "/": + """ + # to use the archive method, just set paths to "" + paths = "" + """ + files = self._get_filelist(self.osbrepository.default_context) + download_url_prefix = f"{self.api_url}/model/download/{self.model_id}.{self.osbrepository.default_context}?filename=" + paths = "\\".join(f"{download_url_prefix}{file['name']}" for file in files) + else: + paths = "\\".join(o.path for o in origins) + + # username / password are not currently used + return workflow.create_copy_task( + image_name="workspaces-biomodels-copy", + workspace_id=workspace_id, + folder=folder, + url=f"{self.model_id}.{self.osbrepository.default_context}", + paths=paths, + username="", + password="", + ) diff --git a/applications/workspaces/tasks/biomodels-copy/run.sh b/applications/workspaces/tasks/biomodels-copy/run.sh index 4988ba2b..9f2de191 100644 --- a/applications/workspaces/tasks/biomodels-copy/run.sh +++ b/applications/workspaces/tasks/biomodels-copy/run.sh @@ -11,21 +11,28 @@ timestamp="$(date +"%Y%m%d%H%M%S-biomodels")" mkdir -p "${download_path}" cd "${download_path}" -# if a file url is passed -if echo "${url}" | grep -E "https://" 2>&1 > /dev/null -then - echo Biomodels copy "${url}" to "${download_path}" - echo "${url}" > filelist +# check is paths has a value, otherwise download the archive and unzip it +# note: we don't use the archive system because the archive is generated on the +# fly and can make us wait for an unspecified amount of time, which tools can't +# work with +# -> left here for completeness +if [ -z "$paths" ]; then + echo Biomodels downloading archive of "${url}" to "${download_path}" + # use ..="true" and ..="false" here, otherwise aria2c gets confused + aria2c --retry-wait=2 --max-tries=5 --timeout=300 --max-concurrent-downloads=5 --max-connection-per-server=5 --allow-overwrite="true" --auto-file-renaming="false" --out="$timestamp.omex" "https://www.ebi.ac.uk/biomodels/model/download/${url}" + unzip -o "$timestamp.omex" && rm -vf "$timestamp.omex" +else + touch filelist + # Split paths by ## and checkout each path + IFS='\' + for path in $paths; do + echo Biomodels copy "${path}" to "${download_path}" + echo "${path}" >> filelist + done + echo Biomodels downloading files aria2c --retry-wait=2 --max-tries=5 --input-file=filelist --max-concurrent-downloads=5 --max-connection-per-server=5 --allow-overwrite "true" --auto-file-renaming "false" rm filelist -f -else - # if the model id is passed, downloads the OMEX archive and unzips it - echo Biomodels copy all files of article "${url}" to "${download_path}" - # use ..="true" and ..="false" here, otherwise aria2c gets confused - aria2c --retry-wait=2 --max-tries=5 --max-concurrent-downloads=5 --max-connection-per-server=5 --allow-overwrite="true" --auto-file-renaming="false" --out="$timestamp.omex" "https://www.ebi.ac.uk/biomodels/model/download/${url}" - unzip -o "$timestamp.omex" && rm -f "$timestamp.omex" fi - # fix permissions chown -R 1000:100 "${download_path}"