diff --git a/rocrate_validator/profiles/five-safes-crate/must/6_workflow_reference.ttl b/rocrate_validator/profiles/five-safes-crate/must/6_workflow_reference.ttl new file mode 100644 index 00000000..ded22942 --- /dev/null +++ b/rocrate_validator/profiles/five-safes-crate/must/6_workflow_reference.ttl @@ -0,0 +1,71 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + + +five-safes-crate:ReferenceToWorkflowCrate + a sh:NodeShape ; + sh:name "RootDataEntity" ; + sh:targetClass ro-crate:RootDataEntity ; + + # RootDataEntity MUST have an mainEntity property + sh:property [ + a sh:PropertyShape ; + sh:name "mainEntity" ; + sh:path schema:mainEntity ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:nodeKind sh:IRI ; + sh:severity sh:Violation ; + sh:message "The RootDataEntity MUST have exactly one schema:mainEntity property that is an IRI." ; + ] ; + + # The mainEntity of a RootDataEntity MUST be a Dataset + sh:property [ + a sh:PropertyShape ; + sh:name "mainEntity" ; + sh:path schema:mainEntity ; + sh:class schema:Dataset ; + sh:severity sh:Violation ; + sh:message "The mainEntity pointed to by the RootDataEntity MUST be of type schema:Dataset" ; + ] . + +five-safes-crate:mainEntityHasProperConformsTo + a sh:NodeShape ; + sh:name "mainEntity" ; + sh:description "The mainEntity of the RootDataEntity MUST have a conformsTo property with an IRI starting with https://w3id.org/workflowhub/workflow-ro-crate" ; + sh:targetObjectsOf schema:mainEntity ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "conformsTo" ; + sh:message "conformsTo IRI must start with https://w3id.org/workflowhub/workflow-ro-crate" ; + sh:select """ + PREFIX schema: + PREFIX purl: + SELECT $this WHERE { + $this purl:conformsTo ?iri . + FILTER(!STRSTARTS(STR(?iri), "https://w3id.org/workflowhub/workflow-ro-crate")) + } + """ ; + ] . diff --git a/rocrate_validator/profiles/five-safes-crate/should/6_workflow_reference.ttl b/rocrate_validator/profiles/five-safes-crate/should/6_workflow_reference.ttl new file mode 100644 index 00000000..c7690ae0 --- /dev/null +++ b/rocrate_validator/profiles/five-safes-crate/should/6_workflow_reference.ttl @@ -0,0 +1,49 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate::DatasetMustHaveDistributionIfURI + a sh:NodeShape ; + sh:name "mainEntity" ; + sh:targetObjectsOf schema:mainEntity ; + sh:description "If mainEntity has an HTTP(S) @id, it SHOULD have a distribution that is an HTTP(S) URL." ; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "distribution" ; + sh:description "If mainEntity has an HTTP(S) @id, it SHOULD have a distribution that is an HTTP(S) URL." ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT $this + WHERE { + FILTER (STRSTARTS(STR($this), "http://") || STRSTARTS(STR($this), "https://")) . + FILTER NOT EXISTS { + $this schema:distribution ?dist . + FILTER (STRSTARTS(STR(?dist), "http://") || STRSTARTS(STR(?dist), "https://")) . + } + } + """ ; + sh:severity sh:Warning ; + sh:message "If mainEntity has an HTTP(S) @id SHOULD have at least one distribution with an HTTP(S) URL." ; + ] . \ No newline at end of file diff --git a/tests/data/crates/invalid/6_five_safes_crate/funding_project_no_name/ro-crate-metadata.json b/tests/data/crates/invalid/6_five_safes_crate/funding_project_no_name/ro-crate-metadata.json new file mode 100644 index 00000000..72a411b8 --- /dev/null +++ b/tests/data/crates/invalid/6_five_safes_crate/funding_project_no_name/ro-crate-metadata.json @@ -0,0 +1,400 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@type": "CreativeWork", + "@id": "ro-crate-metadata.json", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.1" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "5-Safe RO-Crate Result", + "description": "example 5-Safe RO-Crate result metadata for testing", + "datePublished": "2025-09-20T14:45:00+00:00", + "conformsTo": { + "@id": "https://w3id.org/5s-crate/0.4" + }, + "hasPart": [ + { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + { + "@id": "input1.txt" + }, + { + "@id": "outputs/qa.csv" + }, + { + "@id": "outputs/diagrams/" + }, + { + "@id": "workflow/289/" + } + ], + "mainEntity": { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + "mentions": [ + { + "@id": "#query-37252371-c937-43bd-a0a7-3680b48c0538" + }, + { + "@id": "#check-f33fe90c-0c22-4c72-b299-de509028410e" + }, + { + "@id": "#validate-1146f640-819e-4c86-b029-b763a0040896" + }, + { + "@id": "#download-8b51bf57-6b29-44da-b24b-638c8df91639" + }, + { + "@id": "#signoff-3b741265-cfef-49ea-8138-a2fa149bf2f0" + }, + { + "@id": "#disclosure-b16c1f0a-ae7f-4582-9b28-7d9df3313e27" + }, + { + "@id": "#bagit-ce785c0b-c988-4043-8cbd-1489dcebc14f" + } + ], + "sourceOrganization": { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + }, + "publisher": { + "@id": "https://tre72.example.com/" + }, + "license": { + "@id": "http://spdx.org/licenses/CC-BY-4.0" + } + }, + { + "@id": "https://w3id.org/5s-crate/0.4", + "@type": "Profile", + "name": "Five Safes RO-Crate profile" + }, + { + "@id": "https://spdx.org/licenses/CC-BY-4.0", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "identifier": "CC-BY-4.0" + }, + { + "@id": "https://workflowhub.eu/workflows/289?version=1", + "@type": "Dataset", + "name": "CWL Protein MD Setup tutorial with mutations", + "conformsTo": { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0" + }, + "distribution": { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1" + } + }, + { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1", + "@type": "DataDownload", + "conformsTo": { + "@id": "https://w3id.org/ro/crate" + }, + "encodingFormat": "application/zip" + }, + { + "@id": "#query-37252371-c937-43bd-a0a7-3680b48c0538", + "@type": "CreateAction", + "actionStatus": "http://schema.org/CompleteActionStatus", + "agent": { + "@id": "https://orcid.org/0000-0001-9842-9718" + }, + "instrument": { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + "name": "Execute query 12389 on workflow ", + "object": [ + { + "@id": "input1.txt" + }, + { + "@id": "#enableFastMode" + } + ], + "result": [ + { + "@id": "outputs/qa.csv" + }, + { + "@id": "outputs/diagrams/" + }, + { + "@id": "urn:uuid:07b81e0f-7ac4-5428-9940-878b241e2397" + } + ] + }, + { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70", + "@type": "Project", + "identifier": [ + { + "@id": "_:localid:tre72:project81" + } + ], + "funding": { + "@id": "https://gtr.ukri.org/projects?ref=10038961" + }, + "member": [ + { + "@id": "https://ror.org/027m9bs27" + }, + { + "@id": "https://ror.org/01ee9ar58" + } + ] + }, + { + "@id": "_:localid:tre72:project81", + "@type": "PropertyValue", + "name": "tre72", + "value": "project81" + }, + { + "@id": "input1.txt", + "@type": "File", + "name": "input1", + "exampleOfWork": { + "@id": "#sequence" + } + }, + { + "@id": "#enableFastMode", + "@type": "PropertyValue", + "name": "--fast-mode", + "value": "True", + "exampleOfWork": { + "@id": "#fast" + } + }, + { + "@id": "#sequence", + "@type": "FormalParameter", + "name": "input-sequence" + }, + { + "@id": "#fast", + "@type": "FormalParameter", + "name": "fast-mode" + }, + { + "@id": "outputs/qa.csv", + "@type": "File", + "encodingFormat": "text/csv", + "name": "Tabular listing of quality assessment" + }, + { + "@id": "outputs/diagrams/", + "@type": "Dataset", + "name": "Diagrams of regions of interest" + }, + { + "@id": "workflow/289/", + "sameAs": { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + "@type": "Dataset", + "name": "CWL Protein MD Setup tutorial with mutations", + "conformsTo": { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0" + }, + "distribution": { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1" + } + }, + { + "@id": "#check-f33fe90c-0c22-4c72-b299-de509028410e", + "@type": "AssessAction", + "additionalType": { + "@id": "https://w3id.org/shp#CheckValue" + }, + "name": "BagIt checksum of Crate: OK", + "endTime": "2023-04-18T12:11:45+01:00", + "object": { + "@id": "./" + }, + "instrument": { + "@id": "https://www.iana.org/assignments/named-information#sha-512" + }, + "agent": { + "@id": "#validator-a4a66c63-2fe0-4c57-830d-268a40718313" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "#validate-1146f640-819e-4c86-b029-b763a0040896", + "@type": "AssessAction", + "additionalType": { + "@id": "https://w3id.org/shp#ValidationCheck" + }, + "name": "Validation against Five Safes RO-Crate profile: approved", + "startTime": "2023-04-18T12:11:46+01:00", + "endTime": "2023-04-18T12:11:49+01:00", + "object": { + "@id": "./" + }, + "instrument": { + "@id": "https://w3id.org/5s-crate/0.4" + }, + "agent": { + "@id": "#validator-a4a66c63-2fe0-4c57-830d-268a40718313" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "#download-8b51bf57-6b29-44da-b24b-638c8df91639", + "@type": "DownloadAction", + "name": "Downloaded workflow RO-Crate via proxy", + "startTime": "2023-04-18T12:11:50+01:00", + "endTime": "2023-04-18T12:11:52+01:00", + "object": { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1" + }, + "result": { + "@id": "workflow/289/" + }, + "agent": { + "@id": "http://proxy.example.com/" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "#signoff-3b741265-cfef-49ea-8138-a2fa149bf2f0", + "@type": "AssessAction", + "additionalType": { + "@id": "https://w3id.org/shp#SignOff" + }, + "name": "Sign-off of execution according to Agreement policy: approved", + "endTime": "2023-04-19T17:15:12+01:00", + "object": [ + { + "@id": "./" + }, + { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + } + ], + "instrument": { + "@id": "https://tre72.example.com/agreement-policy/81" + }, + "agent": { + "@id": "https://orcid.org/0000-0002-1825-0097" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "#disclosure-b16c1f0a-ae7f-4582-9b28-7d9df3313e27", + "@type": "AssessAction", + "additionalType": { + "@id": "https://w3id.org/shp#DisclosureCheck" + }, + "name": "Disclosure check of workflow results: approved", + "endTime": "2023-04-25T16:00:00+01:00", + "object": { + "@id": "./" + }, + "agent": { + "@id": "https://orcid.org/0000-0002-1825-0097" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "#bagit-ce785c0b-c988-4043-8cbd-1489dcebc14f", + "@type": "UpdateAction", + "startTime": "2023-04-29T12:12:25+01:00", + "additionalType": { + "@id": "https://w3id.org/shp#GenerateCheckValue" + }, + "name": "BagIt manifests of Crate updated", + "object": { + "@id": "./" + }, + "instrument": { + "@id": "https://www.iana.org/assignments/named-information#sha-512" + }, + "agent": { + "@id": "#validator-a4a66c63-2fe0-4c57-830d-268a40718313" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "urn:uuid:07b81e0f-7ac4-5428-9940-878b241e2397", + "@type": "DigitalDocument", + "encodingFormat": "text/csv", + "name": "Patient measurement 07b81e0f-7ac4-5428-9940-878b241e2397", + "hasDigitalDocumentPermission": { + "@id": "#permissions-07b81e0f" + } + }, + { + "@id": "#permissions-07b81e0f", + "@type": "DigitalDocumentPermission", + "permissionType": "http://schema.org/ReadPermission", + "grantee": { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + } + }, + { + "@id": "https://orcid.org/0000-0001-9842-9718", + "@type": "Person", + "name": "Stian Soiland-Reyes", + "affiliation": { + "@id": "https://ror.org/027m9bs27" + }, + "memberOf": [ + { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + } + ] + }, + { + "@id": "https://ror.org/027m9bs27", + "@type": "Organization", + "name": "The University of Manchester" + }, + { + "@id": "https://gtr.ukri.org/projects?ref=10038961", + "@type": "Grant", + "name": "EOSC4Cancer" + }, + { + "@id": "https://tre72.example.com/", + "@type": "Organization", + "name": "TRE 72 trusted research environment at The University of Manchester", + "parentOrganization": { + "@id": "https://ror.org/027m9bs27" + } + }, + { + "@id": "https://tre72.example.com/#crate-validator", + "@type": "SoftwareApplication", + "name": "RO-Crate validator at TRE72", + "provider": { + "@id": "https://tre72.example.com/" + } + }, + { + "@id": "https://tre72.example.com/agreement-policy/81", + "@type": "CreativeWork", + "name": "Agreement policy for TRE72 for project 81" + }, + { + "@id": "https://www.iana.org/assignments/named-information#sha-512", + "@type": "DefinedTerm", + "name": "sha-512 algorithm" + } + ] +} diff --git a/tests/data/crates/invalid/6_five_safes_crate/root_data_entity_no_source_organization/ro-crate-metadata.json b/tests/data/crates/invalid/6_five_safes_crate/root_data_entity_no_source_organization/ro-crate-metadata.json new file mode 100644 index 00000000..10c51d19 --- /dev/null +++ b/tests/data/crates/invalid/6_five_safes_crate/root_data_entity_no_source_organization/ro-crate-metadata.json @@ -0,0 +1,398 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@type": "CreativeWork", + "@id": "ro-crate-metadata.json", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.1" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "5-Safe RO-Crate Result", + "description": "example 5-Safe RO-Crate result metadata for testing", + "datePublished": "2025-09-20T14:45:00+00:00", + "conformsTo": { + "@id": "https://w3id.org/5s-crate/0.4" + }, + "hasPart": [ + { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + { + "@id": "input1.txt" + }, + { + "@id": "outputs/qa.csv" + }, + { + "@id": "outputs/diagrams/" + }, + { + "@id": "workflow/289/" + } + ], + "mainEntity": { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + "mentions": [ + { + "@id": "#query-37252371-c937-43bd-a0a7-3680b48c0538" + }, + { + "@id": "#check-f33fe90c-0c22-4c72-b299-de509028410e" + }, + { + "@id": "#validate-1146f640-819e-4c86-b029-b763a0040896" + }, + { + "@id": "#download-8b51bf57-6b29-44da-b24b-638c8df91639" + }, + { + "@id": "#signoff-3b741265-cfef-49ea-8138-a2fa149bf2f0" + }, + { + "@id": "#disclosure-b16c1f0a-ae7f-4582-9b28-7d9df3313e27" + }, + { + "@id": "#bagit-ce785c0b-c988-4043-8cbd-1489dcebc14f" + } + ], + "publisher": { + "@id": "https://tre72.example.com/" + }, + "license": { + "@id": "http://spdx.org/licenses/CC-BY-4.0" + } + }, + { + "@id": "https://w3id.org/5s-crate/0.4", + "@type": "Profile", + "name": "Five Safes RO-Crate profile" + }, + { + "@id": "https://spdx.org/licenses/CC-BY-4.0", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "identifier": "CC-BY-4.0" + }, + { + "@id": "https://workflowhub.eu/workflows/289?version=1", + "@type": "Dataset", + "name": "CWL Protein MD Setup tutorial with mutations", + "conformsTo": { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0" + }, + "distribution": { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1" + } + }, + { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1", + "@type": "DataDownload", + "conformsTo": { + "@id": "https://w3id.org/ro/crate" + }, + "encodingFormat": "application/zip" + }, + { + "@id": "#query-37252371-c937-43bd-a0a7-3680b48c0538", + "@type": "CreateAction", + "actionStatus": "http://schema.org/CompleteActionStatus", + "agent": { + "@id": "https://orcid.org/0000-0001-9842-9718" + }, + "instrument": { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + "name": "Execute query 12389 on workflow ", + "object": [ + { + "@id": "input1.txt" + }, + { + "@id": "#enableFastMode" + } + ], + "result": [ + { + "@id": "outputs/qa.csv" + }, + { + "@id": "outputs/diagrams/" + }, + { + "@id": "urn:uuid:07b81e0f-7ac4-5428-9940-878b241e2397" + } + ] + }, + { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70", + "@type": "Project", + "name": "Investigation of cancer (TRE72 project 81)", + "identifier": [ + { + "@id": "_:localid:tre72:project81" + } + ], + "funding": { + "@id": "https://gtr.ukri.org/projects?ref=10038961" + }, + "member": [ + { + "@id": "https://ror.org/027m9bs27" + }, + { + "@id": "https://ror.org/01ee9ar58" + } + ] + }, + { + "@id": "_:localid:tre72:project81", + "@type": "PropertyValue", + "name": "tre72", + "value": "project81" + }, + { + "@id": "input1.txt", + "@type": "File", + "name": "input1", + "exampleOfWork": { + "@id": "#sequence" + } + }, + { + "@id": "#enableFastMode", + "@type": "PropertyValue", + "name": "--fast-mode", + "value": "True", + "exampleOfWork": { + "@id": "#fast" + } + }, + { + "@id": "#sequence", + "@type": "FormalParameter", + "name": "input-sequence" + }, + { + "@id": "#fast", + "@type": "FormalParameter", + "name": "fast-mode" + }, + { + "@id": "outputs/qa.csv", + "@type": "File", + "encodingFormat": "text/csv", + "name": "Tabular listing of quality assessment" + }, + { + "@id": "outputs/diagrams/", + "@type": "Dataset", + "name": "Diagrams of regions of interest" + }, + { + "@id": "workflow/289/", + "sameAs": { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + "@type": "Dataset", + "name": "CWL Protein MD Setup tutorial with mutations", + "conformsTo": { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0" + }, + "distribution": { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1" + } + }, + { + "@id": "#check-f33fe90c-0c22-4c72-b299-de509028410e", + "@type": "AssessAction", + "additionalType": { + "@id": "https://w3id.org/shp#CheckValue" + }, + "name": "BagIt checksum of Crate: OK", + "endTime": "2023-04-18T12:11:45+01:00", + "object": { + "@id": "./" + }, + "instrument": { + "@id": "https://www.iana.org/assignments/named-information#sha-512" + }, + "agent": { + "@id": "#validator-a4a66c63-2fe0-4c57-830d-268a40718313" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "#validate-1146f640-819e-4c86-b029-b763a0040896", + "@type": "AssessAction", + "additionalType": { + "@id": "https://w3id.org/shp#ValidationCheck" + }, + "name": "Validation against Five Safes RO-Crate profile: approved", + "startTime": "2023-04-18T12:11:46+01:00", + "endTime": "2023-04-18T12:11:49+01:00", + "object": { + "@id": "./" + }, + "instrument": { + "@id": "https://w3id.org/5s-crate/0.4" + }, + "agent": { + "@id": "#validator-a4a66c63-2fe0-4c57-830d-268a40718313" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "#download-8b51bf57-6b29-44da-b24b-638c8df91639", + "@type": "DownloadAction", + "name": "Downloaded workflow RO-Crate via proxy", + "startTime": "2023-04-18T12:11:50+01:00", + "endTime": "2023-04-18T12:11:52+01:00", + "object": { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1" + }, + "result": { + "@id": "workflow/289/" + }, + "agent": { + "@id": "http://proxy.example.com/" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "#signoff-3b741265-cfef-49ea-8138-a2fa149bf2f0", + "@type": "AssessAction", + "additionalType": { + "@id": "https://w3id.org/shp#SignOff" + }, + "name": "Sign-off of execution according to Agreement policy: approved", + "endTime": "2023-04-19T17:15:12+01:00", + "object": [ + { + "@id": "./" + }, + { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + } + ], + "instrument": { + "@id": "https://tre72.example.com/agreement-policy/81" + }, + "agent": { + "@id": "https://orcid.org/0000-0002-1825-0097" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "#disclosure-b16c1f0a-ae7f-4582-9b28-7d9df3313e27", + "@type": "AssessAction", + "additionalType": { + "@id": "https://w3id.org/shp#DisclosureCheck" + }, + "name": "Disclosure check of workflow results: approved", + "endTime": "2023-04-25T16:00:00+01:00", + "object": { + "@id": "./" + }, + "agent": { + "@id": "https://orcid.org/0000-0002-1825-0097" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "#bagit-ce785c0b-c988-4043-8cbd-1489dcebc14f", + "@type": "UpdateAction", + "startTime": "2023-04-29T12:12:25+01:00", + "additionalType": { + "@id": "https://w3id.org/shp#GenerateCheckValue" + }, + "name": "BagIt manifests of Crate updated", + "object": { + "@id": "./" + }, + "instrument": { + "@id": "https://www.iana.org/assignments/named-information#sha-512" + }, + "agent": { + "@id": "#validator-a4a66c63-2fe0-4c57-830d-268a40718313" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "urn:uuid:07b81e0f-7ac4-5428-9940-878b241e2397", + "@type": "DigitalDocument", + "encodingFormat": "text/csv", + "name": "Patient measurement 07b81e0f-7ac4-5428-9940-878b241e2397", + "hasDigitalDocumentPermission": { + "@id": "#permissions-07b81e0f" + } + }, + { + "@id": "#permissions-07b81e0f", + "@type": "DigitalDocumentPermission", + "permissionType": "http://schema.org/ReadPermission", + "grantee": { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + } + }, + { + "@id": "https://orcid.org/0000-0001-9842-9718", + "@type": "Person", + "name": "Stian Soiland-Reyes", + "affiliation": { + "@id": "https://ror.org/027m9bs27" + }, + "memberOf": [ + { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + } + ] + }, + { + "@id": "https://ror.org/027m9bs27", + "@type": "Organization", + "name": "The University of Manchester" + }, + { + "@id": "https://gtr.ukri.org/projects?ref=10038961", + "@type": "Grant", + "name": "EOSC4Cancer" + }, + { + "@id": "https://tre72.example.com/", + "@type": "Organization", + "name": "TRE 72 trusted research environment at The University of Manchester", + "parentOrganization": { + "@id": "https://ror.org/027m9bs27" + } + }, + { + "@id": "https://tre72.example.com/#crate-validator", + "@type": "SoftwareApplication", + "name": "RO-Crate validator at TRE72", + "provider": { + "@id": "https://tre72.example.com/" + } + }, + { + "@id": "https://tre72.example.com/agreement-policy/81", + "@type": "CreativeWork", + "name": "Agreement policy for TRE72 for project 81" + }, + { + "@id": "https://www.iana.org/assignments/named-information#sha-512", + "@type": "DefinedTerm", + "name": "sha-512 algorithm" + } + ] +} diff --git a/tests/data/crates/invalid/6_five_safes_crate/root_data_entity_source_organization_not_entity/ro-crate-metadata.json b/tests/data/crates/invalid/6_five_safes_crate/root_data_entity_source_organization_not_entity/ro-crate-metadata.json new file mode 100644 index 00000000..e181d903 --- /dev/null +++ b/tests/data/crates/invalid/6_five_safes_crate/root_data_entity_source_organization_not_entity/ro-crate-metadata.json @@ -0,0 +1,399 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@type": "CreativeWork", + "@id": "ro-crate-metadata.json", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.1" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "5-Safe RO-Crate Result", + "description": "example 5-Safe RO-Crate result metadata for testing", + "datePublished": "2025-09-20T14:45:00+00:00", + "conformsTo": { + "@id": "https://w3id.org/5s-crate/0.4" + }, + "hasPart": [ + { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + { + "@id": "input1.txt" + }, + { + "@id": "outputs/qa.csv" + }, + { + "@id": "outputs/diagrams/" + }, + { + "@id": "workflow/289/" + } + ], + "mainEntity": { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + "mentions": [ + { + "@id": "#query-37252371-c937-43bd-a0a7-3680b48c0538" + }, + { + "@id": "#check-f33fe90c-0c22-4c72-b299-de509028410e" + }, + { + "@id": "#validate-1146f640-819e-4c86-b029-b763a0040896" + }, + { + "@id": "#download-8b51bf57-6b29-44da-b24b-638c8df91639" + }, + { + "@id": "#signoff-3b741265-cfef-49ea-8138-a2fa149bf2f0" + }, + { + "@id": "#disclosure-b16c1f0a-ae7f-4582-9b28-7d9df3313e27" + }, + { + "@id": "#bagit-ce785c0b-c988-4043-8cbd-1489dcebc14f" + } + ], + "sourceOrganization": "Investigation of cancer (TRE72 project 81)", + "publisher": { + "@id": "https://tre72.example.com/" + }, + "license": { + "@id": "http://spdx.org/licenses/CC-BY-4.0" + } + }, + { + "@id": "https://w3id.org/5s-crate/0.4", + "@type": "Profile", + "name": "Five Safes RO-Crate profile" + }, + { + "@id": "https://spdx.org/licenses/CC-BY-4.0", + "@type": "CreativeWork", + "name": "Creative Commons Attribution 4.0 International", + "identifier": "CC-BY-4.0" + }, + { + "@id": "https://workflowhub.eu/workflows/289?version=1", + "@type": "Dataset", + "name": "CWL Protein MD Setup tutorial with mutations", + "conformsTo": { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0" + }, + "distribution": { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1" + } + }, + { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1", + "@type": "DataDownload", + "conformsTo": { + "@id": "https://w3id.org/ro/crate" + }, + "encodingFormat": "application/zip" + }, + { + "@id": "#query-37252371-c937-43bd-a0a7-3680b48c0538", + "@type": "CreateAction", + "actionStatus": "http://schema.org/CompleteActionStatus", + "agent": { + "@id": "https://orcid.org/0000-0001-9842-9718" + }, + "instrument": { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + "name": "Execute query 12389 on workflow ", + "object": [ + { + "@id": "input1.txt" + }, + { + "@id": "#enableFastMode" + } + ], + "result": [ + { + "@id": "outputs/qa.csv" + }, + { + "@id": "outputs/diagrams/" + }, + { + "@id": "urn:uuid:07b81e0f-7ac4-5428-9940-878b241e2397" + } + ] + }, + { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70", + "@type": "Project", + "name": "Investigation of cancer (TRE72 project 81)", + "identifier": [ + { + "@id": "_:localid:tre72:project81" + } + ], + "funding": { + "@id": "https://gtr.ukri.org/projects?ref=10038961" + }, + "member": [ + { + "@id": "https://ror.org/027m9bs27" + }, + { + "@id": "https://ror.org/01ee9ar58" + } + ] + }, + { + "@id": "_:localid:tre72:project81", + "@type": "PropertyValue", + "name": "tre72", + "value": "project81" + }, + { + "@id": "input1.txt", + "@type": "File", + "name": "input1", + "exampleOfWork": { + "@id": "#sequence" + } + }, + { + "@id": "#enableFastMode", + "@type": "PropertyValue", + "name": "--fast-mode", + "value": "True", + "exampleOfWork": { + "@id": "#fast" + } + }, + { + "@id": "#sequence", + "@type": "FormalParameter", + "name": "input-sequence" + }, + { + "@id": "#fast", + "@type": "FormalParameter", + "name": "fast-mode" + }, + { + "@id": "outputs/qa.csv", + "@type": "File", + "encodingFormat": "text/csv", + "name": "Tabular listing of quality assessment" + }, + { + "@id": "outputs/diagrams/", + "@type": "Dataset", + "name": "Diagrams of regions of interest" + }, + { + "@id": "workflow/289/", + "sameAs": { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + "@type": "Dataset", + "name": "CWL Protein MD Setup tutorial with mutations", + "conformsTo": { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0" + }, + "distribution": { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1" + } + }, + { + "@id": "#check-f33fe90c-0c22-4c72-b299-de509028410e", + "@type": "AssessAction", + "additionalType": { + "@id": "https://w3id.org/shp#CheckValue" + }, + "name": "BagIt checksum of Crate: OK", + "endTime": "2023-04-18T12:11:45+01:00", + "object": { + "@id": "./" + }, + "instrument": { + "@id": "https://www.iana.org/assignments/named-information#sha-512" + }, + "agent": { + "@id": "#validator-a4a66c63-2fe0-4c57-830d-268a40718313" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "#validate-1146f640-819e-4c86-b029-b763a0040896", + "@type": "AssessAction", + "additionalType": { + "@id": "https://w3id.org/shp#ValidationCheck" + }, + "name": "Validation against Five Safes RO-Crate profile: approved", + "startTime": "2023-04-18T12:11:46+01:00", + "endTime": "2023-04-18T12:11:49+01:00", + "object": { + "@id": "./" + }, + "instrument": { + "@id": "https://w3id.org/5s-crate/0.4" + }, + "agent": { + "@id": "#validator-a4a66c63-2fe0-4c57-830d-268a40718313" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "#download-8b51bf57-6b29-44da-b24b-638c8df91639", + "@type": "DownloadAction", + "name": "Downloaded workflow RO-Crate via proxy", + "startTime": "2023-04-18T12:11:50+01:00", + "endTime": "2023-04-18T12:11:52+01:00", + "object": { + "@id": "https://workflowhub.eu/workflows/289/ro_crate?version=1" + }, + "result": { + "@id": "workflow/289/" + }, + "agent": { + "@id": "http://proxy.example.com/" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "#signoff-3b741265-cfef-49ea-8138-a2fa149bf2f0", + "@type": "AssessAction", + "additionalType": { + "@id": "https://w3id.org/shp#SignOff" + }, + "name": "Sign-off of execution according to Agreement policy: approved", + "endTime": "2023-04-19T17:15:12+01:00", + "object": [ + { + "@id": "./" + }, + { + "@id": "https://workflowhub.eu/workflows/289?version=1" + }, + { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + } + ], + "instrument": { + "@id": "https://tre72.example.com/agreement-policy/81" + }, + "agent": { + "@id": "https://orcid.org/0000-0002-1825-0097" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "#disclosure-b16c1f0a-ae7f-4582-9b28-7d9df3313e27", + "@type": "AssessAction", + "additionalType": { + "@id": "https://w3id.org/shp#DisclosureCheck" + }, + "name": "Disclosure check of workflow results: approved", + "endTime": "2023-04-25T16:00:00+01:00", + "object": { + "@id": "./" + }, + "agent": { + "@id": "https://orcid.org/0000-0002-1825-0097" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "#bagit-ce785c0b-c988-4043-8cbd-1489dcebc14f", + "@type": "UpdateAction", + "startTime": "2023-04-29T12:12:25+01:00", + "additionalType": { + "@id": "https://w3id.org/shp#GenerateCheckValue" + }, + "name": "BagIt manifests of Crate updated", + "object": { + "@id": "./" + }, + "instrument": { + "@id": "https://www.iana.org/assignments/named-information#sha-512" + }, + "agent": { + "@id": "#validator-a4a66c63-2fe0-4c57-830d-268a40718313" + }, + "actionStatus": "http://schema.org/CompletedActionStatus" + }, + { + "@id": "urn:uuid:07b81e0f-7ac4-5428-9940-878b241e2397", + "@type": "DigitalDocument", + "encodingFormat": "text/csv", + "name": "Patient measurement 07b81e0f-7ac4-5428-9940-878b241e2397", + "hasDigitalDocumentPermission": { + "@id": "#permissions-07b81e0f" + } + }, + { + "@id": "#permissions-07b81e0f", + "@type": "DigitalDocumentPermission", + "permissionType": "http://schema.org/ReadPermission", + "grantee": { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + } + }, + { + "@id": "https://orcid.org/0000-0001-9842-9718", + "@type": "Person", + "name": "Stian Soiland-Reyes", + "affiliation": { + "@id": "https://ror.org/027m9bs27" + }, + "memberOf": [ + { + "@id": "#project-be6ffb55-4f5a-4c14-b60e-47e0951090c70" + } + ] + }, + { + "@id": "https://ror.org/027m9bs27", + "@type": "Organization", + "name": "The University of Manchester" + }, + { + "@id": "https://gtr.ukri.org/projects?ref=10038961", + "@type": "Grant", + "name": "EOSC4Cancer" + }, + { + "@id": "https://tre72.example.com/", + "@type": "Organization", + "name": "TRE 72 trusted research environment at The University of Manchester", + "parentOrganization": { + "@id": "https://ror.org/027m9bs27" + } + }, + { + "@id": "https://tre72.example.com/#crate-validator", + "@type": "SoftwareApplication", + "name": "RO-Crate validator at TRE72", + "provider": { + "@id": "https://tre72.example.com/" + } + }, + { + "@id": "https://tre72.example.com/agreement-policy/81", + "@type": "CreativeWork", + "name": "Agreement policy for TRE72 for project 81" + }, + { + "@id": "https://www.iana.org/assignments/named-information#sha-512", + "@type": "DefinedTerm", + "name": "sha-512 algorithm" + } + ] +} diff --git a/tests/integration/profiles/five-safes-crate/test_5src_6_workflow_reference.py b/tests/integration/profiles/five-safes-crate/test_5src_6_workflow_reference.py new file mode 100644 index 00000000..d61803c7 --- /dev/null +++ b/tests/integration/profiles/five-safes-crate/test_5src_6_workflow_reference.py @@ -0,0 +1,161 @@ +# Copyright (c) 2024-2025 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rocrate_validator.models import Severity +from tests.ro_crates import ValidROC +from tests.shared import do_entity_test, SPARQL_PREFIXES + +# set up logging +logger = logging.getLogger(__name__) + + +# ----- MUST fails tests + + +def test_5src_root_data_entity_no_main_entity(): + """ + Remove the RootDataEntity's mainEntity so minCount=1 is violated. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + <./> schema:mainEntity ?m . + } + WHERE { + <./> schema:mainEntity ?m . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["RootDataEntity"], + expected_triggered_issues=[ + "The RootDataEntity MUST have exactly one schema:mainEntity property that is an IRI." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_root_data_entity_main_entity_not_dataset_iri(): + """ + Test a Five Safes Crate where the RootDataEntity's mainEntity is an IRI but not typed as schema:Dataset. + (We point mainEntity to a new crate-local entity typed as something else.) + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + <./> schema:mainEntity ?m . + } + INSERT { + # add an IRI that is NOT typed as schema:Dataset (e.g. a schema:SoftwareSourceCode) + <./> schema:mainEntity <./not-a-dataset> . + <./not-a-dataset> a schema:SoftwareSourceCode . + } + WHERE { + <./> schema:mainEntity ?m . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["RootDataEntity"], + expected_triggered_issues=[ + "The mainEntity pointed to by the RootDataEntity MUST be of type schema:Dataset" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +def test_5src_main_entity_conformsTo_invalid(): + """ + Test a Five Safes Crate where the mainEntity's purl:conformsTo IRI does NOT start with + "https://w3id.org/workflowhub/workflow-ro-crate" (violates the SHACL SPARQL constraint). + """ + sparql = ( + SPARQL_PREFIXES + + """ + PREFIX purl: + DELETE { + ?dataset purl:conformsTo ?iri . + } + INSERT { + ?dataset purl:conformsTo . + } + WHERE { + <./> schema:mainEntity ?dataset . + ?dataset purl:conformsTo ?iri . + FILTER(STRSTARTS(STR(?iri), "https://w3id.org/workflowhub/workflow-ro-crate")) + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.REQUIRED, + expected_validation_result=False, + expected_triggered_requirements=["mainEntity"], + expected_triggered_issues=[ + "conformsTo IRI must start with https://w3id.org/workflowhub/workflow-ro-crate" + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + ) + + +# ----- SHOULD fails tests + + +def test_5src_main_entity_missing_distribution_warning(): + """ + Test a Five Safes Crate where a mainEntity has an HTTP(S) IRI but no distribution with an HTTP(S) URL. + This should trigger the SHACL warning about missing or non-HTTP(S) distributions. + """ + sparql = ( + SPARQL_PREFIXES + + """ + DELETE { + ?dataset schema:distribution ?dist . + } + WHERE { + <./> schema:mainEntity ?dataset . + ?dataset schema:distribution ?dist . + FILTER (STRSTARTS(STR(?dataset), "http://") || STRSTARTS(STR(?dataset), "https://")) . + FILTER (STRSTARTS(STR(?dist), "http://") || STRSTARTS(STR(?dist), "https://")) . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().five_safes_crate_request, + requirement_severity=Severity.RECOMMENDED, + expected_validation_result=False, + expected_triggered_requirements=["mainEntity"], + expected_triggered_issues=[ + "If mainEntity has an HTTP(S) @id SHOULD have at least one distribution with an HTTP(S) URL." + ], + profile_identifier="five-safes-crate", + rocrate_entity_mod_sparql=sparql, + )