From 97d7fda5b0d48725e4a8bdcf63e4199e4b138c83 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 22 Sep 2023 15:05:50 +0200 Subject: [PATCH 01/62] Initial push of the ISA JSON parser app --- .gitignore | 2 +- read-isa-json/isa_objects/isa_study.py | 13 + read-isa-json/read_isa_json.py | 15 + read-isa-json/read_isa_json/__init__.py | 0 .../test_local_instance.json | 4427 +++++++++++++++++ 5 files changed, 4456 insertions(+), 1 deletion(-) create mode 100644 read-isa-json/isa_objects/isa_study.py create mode 100644 read-isa-json/read_isa_json.py create mode 100644 read-isa-json/read_isa_json/__init__.py create mode 100644 read-isa-json/test_isa_json_files/test_local_instance.json diff --git a/.gitignore b/.gitignore index 7e6c35e..4e6ddc8 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,4 @@ .secret.yml build/ ena_upload_cli.egg-info/ -ena_upload/__pycache__/ +__pycache__/ diff --git a/read-isa-json/isa_objects/isa_study.py b/read-isa-json/isa_objects/isa_study.py new file mode 100644 index 0000000..c3a0c56 --- /dev/null +++ b/read-isa-json/isa_objects/isa_study.py @@ -0,0 +1,13 @@ +class IsaStudy: + def __init__( + self, alias, title, study_type, study_abstract, new_study_type=None + ) -> None: + self.alias = alias + self.title = title + self.study_type = study_type + self.new_study_type = new_study_type + self.study_abstract = study_abstract + + @classmethod + def from_isa_json(isa_json): + pass \ No newline at end of file diff --git a/read-isa-json/read_isa_json.py b/read-isa-json/read_isa_json.py new file mode 100644 index 0000000..cbb81b4 --- /dev/null +++ b/read-isa-json/read_isa_json.py @@ -0,0 +1,15 @@ +import json +from isa_objects import isa_study + +from rich import print_json + +# Read json file +isa_json_file = open('test_isa_json_files/test_local_instance.json') +isa_json = json.load(isa_json_file) +# print_json(data = isa_json) + +# Extracting the Study information +studies = isa_json['studies'] +print_json(data=studies) + +# study_information = isa_study.IsaStudy.from_isa_json(isa_json) \ No newline at end of file diff --git a/read-isa-json/read_isa_json/__init__.py b/read-isa-json/read_isa_json/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/read-isa-json/test_isa_json_files/test_local_instance.json b/read-isa-json/test_isa_json_files/test_local_instance.json new file mode 100644 index 0000000..d2e1b46 --- /dev/null +++ b/read-isa-json/test_isa_json_files/test_local_instance.json @@ -0,0 +1,4427 @@ +{ + "identifier": "", + "title": "ENA upload investigation", + "description": "", + "submissionDate": "", + "publicReleaseDate": "", + "ontologySourceReferences": [], + "filename": "ENA upload investigation.txt", + "comments": [ + { "name": "ISAjson export time", "value": "2023-09-22T07:31:31Z" }, + { "name": "SEEK Project name", "value": "ENA Upload Project" }, + { + "name": "SEEK Project ID", + "value": "http://localhost:3000/single_pages/3" + }, + { "name": "SEEK Investigation ID", "value": "2" } + ], + "publications": [], + "people": [ + { + "@id": "#people/1", + "lastName": "De Pelseneer", + "firstName": "Kevin", + "midInitials": "", + "email": "jklhndvkjdsbvs@mail.com", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { "termAccession": "", "termSource": "", "annotationValue": "" } + ], + "comments": [{ "@id": "", "value": "", "name": "" }] + } + ], + "studies": [ + { + "identifier": "", + "title": "ENA Upload Study", + "description": "", + "submissionDate": "", + "publicReleaseDate": "", + "filename": "ENA Upload Study.txt", + "comments": [ + { "name": "SEEK Study ID", "value": "2" }, + { "name": "SEEK creation date", "value": "2023-09-22T06:14:34Z" } + ], + "publications": [ + { + "id": 1, + "pubmed_id": 666, + "title": "The number of the beast", + "abstract": "", + "published_date": "2023-09-22", + "journal": "", + "first_letter": "T", + "contributor_id": 1, + "created_at": "2023-09-22T06:10:30.000Z", + "updated_at": "2023-09-22T06:10:30.000Z", + "doi": null, + "uuid": "a800ee30-3b3c-013c-36ae-2c91a17e7bb9", + "policy_id": 43, + "citation": "", + "deleted_contributor": null, + "registered_mode": 3, + "booktitle": "", + "publisher": "", + "editor": "yo mama", + "publication_type_id": 1, + "url": "", + "version": 1, + "license": null, + "other_creators": null + } + ], + "people": [ + { + "@id": "#people/1", + "lastName": "De Pelseneer", + "firstName": "Kevin", + "midInitials": "", + "email": "jklhndvkjdsbvs@mail.com", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { "termAccession": "", "termSource": "", "annotationValue": "" } + ], + "comments": [{ "@id": "", "value": "", "name": "" }] + } + ], + "studyDesignDescriptors": [], + "characteristicCategories": [ + { + "@id": "#characteristic_category/title_62", + "characteristicType": { + "annotationValue": "title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/description_63", + "characteristicType": { + "annotationValue": "description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/taxon_id_64", + "characteristicType": { + "annotationValue": "taxon_id", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "sources": [ + { + "@id": "#source/13", + "name": "Source 1", + "characteristics": [ + { + "category": { "@id": "#characteristic_category/title_62" }, + "value": { + "annotationValue": "Source awsome dragon 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/description_63" + }, + "value": { + "annotationValue": "This is my first source. Be gentle!", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#characteristic_category/taxon_id_64" }, + "value": { + "annotationValue": "103695", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#source/14", + "name": "Source 2", + "characteristics": [ + { + "category": { "@id": "#characteristic_category/title_62" }, + "value": { + "annotationValue": "Source awsome dragon 2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/description_63" + }, + "value": { + "annotationValue": "This is the second source. This is my control group!", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#characteristic_category/taxon_id_64" }, + "value": { + "annotationValue": "103695", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#source/15", + "name": "Source 3", + "characteristics": [ + { + "category": { "@id": "#characteristic_category/title_62" }, + "value": { + "annotationValue": "Source awsome dragon 3", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/description_63" + }, + "value": { + "annotationValue": "This is the third source. Meh! I have two others...", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#characteristic_category/taxon_id_64" }, + "value": { + "annotationValue": "103695", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ], + "samples": [ + { + "@id": "#sample/16", + "name": "Dragon heart source 1 - A", + "derivesFrom": [{ "@id": "#source/13" }], + "characteristics": [], + "factorValues": [ + { + "category": { "@id": "" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/17", + "name": "Dragon heart source 2 -A", + "derivesFrom": [{ "@id": "#source/14" }], + "characteristics": [], + "factorValues": [ + { + "category": { "@id": "" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/18", + "name": "Dragon heart source 3 - A", + "derivesFrom": [{ "@id": "#source/15" }], + "characteristics": [], + "factorValues": [ + { + "category": { "@id": "" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/19", + "name": "Dragon brain source 1 - A", + "derivesFrom": [{ "@id": "#source/13" }], + "characteristics": [], + "factorValues": [ + { + "category": { "@id": "" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/20", + "name": "Dragon brain source 2 - A", + "derivesFrom": [{ "@id": "#source/14" }], + "characteristics": [], + "factorValues": [ + { + "category": { "@id": "" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/21", + "name": "Dragon brain source 3 - A", + "derivesFrom": [{ "@id": "#source/15" }], + "characteristics": [], + "factorValues": [ + { + "category": { "@id": "" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/22", + "name": "Dragon heart source 1 - C", + "derivesFrom": [{ "@id": "#source/13" }], + "characteristics": [], + "factorValues": [ + { + "category": { "@id": "" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/23", + "name": "Dragon heart source 1 - B", + "derivesFrom": [{ "@id": "#source/13" }], + "characteristics": [], + "factorValues": [ + { + "category": { "@id": "" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/24", + "name": "Dragon heart source 2 - B", + "derivesFrom": [{ "@id": "#source/14" }], + "characteristics": [], + "factorValues": [ + { + "category": { "@id": "" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/25", + "name": "Dragon heart source 2 - C", + "derivesFrom": [{ "@id": "#source/14" }], + "characteristics": [], + "factorValues": [ + { + "category": { "@id": "" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/26", + "name": "Dragon heart source 3 - B", + "derivesFrom": [{ "@id": "#source/15" }], + "characteristics": [], + "factorValues": [ + { + "category": { "@id": "" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/27", + "name": "Dragon heart source 3 - C", + "derivesFrom": [{ "@id": "#source/15" }], + "characteristics": [], + "factorValues": [ + { + "category": { "@id": "" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/28", + "name": "Dragon brain source 1 - B", + "derivesFrom": [{ "@id": "#source/13" }], + "characteristics": [], + "factorValues": [ + { + "category": { "@id": "" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/29", + "name": "Dragon brain source 1 - C", + "derivesFrom": [{ "@id": "#source/13" }], + "characteristics": [], + "factorValues": [ + { + "category": { "@id": "" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/30", + "name": "Dragon brain source 2 - B", + "derivesFrom": [{ "@id": "#source/14" }], + "characteristics": [], + "factorValues": [ + { + "category": { "@id": "" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/31", + "name": "Dragon brain source 2 - C", + "derivesFrom": [{ "@id": "#source/14" }], + "characteristics": [], + "factorValues": [ + { + "category": { "@id": "" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/32", + "name": "Dragon brain source 3 - B", + "derivesFrom": [{ "@id": "#source/15" }], + "characteristics": [], + "factorValues": [ + { + "category": { "@id": "" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/33", + "name": "Dragon brain source 3 - C", + "derivesFrom": [{ "@id": "#source/15" }], + "characteristics": [], + "factorValues": [ + { + "category": { "@id": "" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ] + }, + "protocols": [ + { + "@id": "#protocol/_2", + "name": "sample collection", + "protocolType": { + "annotationValue": "sample collection", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_9", + "name": "library construction", + "protocolType": { + "annotationValue": "library construction", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [ + { + "@id": "#parameter/70", + "parameterName": { + "annotationValue": "library_construction_protocol", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/72", + "parameterName": { + "annotationValue": "design_description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/73", + "parameterName": { + "annotationValue": "library_source", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/74", + "parameterName": { + "annotationValue": "library_strategy", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/75", + "parameterName": { + "annotationValue": "library_selection", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/76", + "parameterName": { + "annotationValue": "library_layout", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/77", + "parameterName": { + "annotationValue": "insert_size", + "termAccession": "", + "termSource": "" + } + } + ], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_10", + "name": "nucleic acid sequencing", + "protocolType": { + "annotationValue": "nucleic acid sequencing", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [ + { + "@id": "#parameter/84", + "parameterName": { + "annotationValue": "platform", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/85", + "parameterName": { + "annotationValue": "instrument_model", + "termAccession": "", + "termSource": "" + } + } + ], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + } + ], + "processSequence": [ + { + "@id": "#process/sample_collection/16", + "name": "", + "executesProtocol": { "@id": "#protocol/_2" }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [{ "@id": "#source/13" }], + "outputs": [{ "@id": "#sample/16" }] + }, + { + "@id": "#process/sample_collection/17", + "name": "", + "executesProtocol": { "@id": "#protocol/_2" }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [{ "@id": "#source/14" }], + "outputs": [{ "@id": "#sample/17" }] + }, + { + "@id": "#process/sample_collection/18", + "name": "", + "executesProtocol": { "@id": "#protocol/_2" }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [{ "@id": "#source/15" }], + "outputs": [{ "@id": "#sample/18" }] + }, + { + "@id": "#process/sample_collection/19", + "name": "", + "executesProtocol": { "@id": "#protocol/_2" }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [{ "@id": "#source/13" }], + "outputs": [{ "@id": "#sample/19" }] + }, + { + "@id": "#process/sample_collection/20", + "name": "", + "executesProtocol": { "@id": "#protocol/_2" }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [{ "@id": "#source/14" }], + "outputs": [{ "@id": "#sample/20" }] + }, + { + "@id": "#process/sample_collection/21", + "name": "", + "executesProtocol": { "@id": "#protocol/_2" }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [{ "@id": "#source/15" }], + "outputs": [{ "@id": "#sample/21" }] + }, + { + "@id": "#process/sample_collection/22", + "name": "", + "executesProtocol": { "@id": "#protocol/_2" }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [{ "@id": "#source/13" }], + "outputs": [{ "@id": "#sample/22" }] + }, + { + "@id": "#process/sample_collection/23", + "name": "", + "executesProtocol": { "@id": "#protocol/_2" }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [{ "@id": "#source/13" }], + "outputs": [{ "@id": "#sample/23" }] + }, + { + "@id": "#process/sample_collection/24", + "name": "", + "executesProtocol": { "@id": "#protocol/_2" }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [{ "@id": "#source/14" }], + "outputs": [{ "@id": "#sample/24" }] + }, + { + "@id": "#process/sample_collection/25", + "name": "", + "executesProtocol": { "@id": "#protocol/_2" }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [{ "@id": "#source/14" }], + "outputs": [{ "@id": "#sample/25" }] + }, + { + "@id": "#process/sample_collection/26", + "name": "", + "executesProtocol": { "@id": "#protocol/_2" }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [{ "@id": "#source/15" }], + "outputs": [{ "@id": "#sample/26" }] + }, + { + "@id": "#process/sample_collection/27", + "name": "", + "executesProtocol": { "@id": "#protocol/_2" }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [{ "@id": "#source/15" }], + "outputs": [{ "@id": "#sample/27" }] + }, + { + "@id": "#process/sample_collection/28", + "name": "", + "executesProtocol": { "@id": "#protocol/_2" }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [{ "@id": "#source/13" }], + "outputs": [{ "@id": "#sample/28" }] + }, + { + "@id": "#process/sample_collection/29", + "name": "", + "executesProtocol": { "@id": "#protocol/_2" }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [{ "@id": "#source/13" }], + "outputs": [{ "@id": "#sample/29" }] + }, + { + "@id": "#process/sample_collection/30", + "name": "", + "executesProtocol": { "@id": "#protocol/_2" }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [{ "@id": "#source/14" }], + "outputs": [{ "@id": "#sample/30" }] + }, + { + "@id": "#process/sample_collection/31", + "name": "", + "executesProtocol": { "@id": "#protocol/_2" }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [{ "@id": "#source/14" }], + "outputs": [{ "@id": "#sample/31" }] + }, + { + "@id": "#process/sample_collection/32", + "name": "", + "executesProtocol": { "@id": "#protocol/_2" }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [{ "@id": "#source/15" }], + "outputs": [{ "@id": "#sample/32" }] + }, + { + "@id": "#process/sample_collection/33", + "name": "", + "executesProtocol": { "@id": "#protocol/_2" }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [{ "@id": "#source/15" }], + "outputs": [{ "@id": "#sample/33" }] + } + ], + "assays": [ + { + "@id": "#assay/9_10", + "filename": "a_assays.txt", + "measurementType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "technologyType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "technologyPlatform": "", + "characteristicCategories": [ + { + "@id": "#characteristic_category/title_71", + "characteristicType": { + "annotationValue": "title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_79", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_80", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/accession_81", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "samples": [], + "otherMaterials": [ + { + "@id": "#other_material/34", + "name": "Lib Heart 1 - A", + "type": "library_name", + "characteristics": [ + { + "category": { "@id": "#characteristic_category/title_71" }, + "value": { + "annotationValue": "Lib Title Heart 1 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#characteristic_category/status_80" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [{ "@id": "#sample/16" }] + }, + { + "@id": "#other_material/35", + "name": "Lib Heart 1 - B", + "type": "library_name", + "characteristics": [ + { + "category": { "@id": "#characteristic_category/title_71" }, + "value": { + "annotationValue": "Lib Title Heart 1 - B", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#characteristic_category/status_80" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [{ "@id": "#sample/23" }] + }, + { + "@id": "#other_material/36", + "name": "Lib Heart 1 - C", + "type": "library_name", + "characteristics": [ + { + "category": { "@id": "#characteristic_category/title_71" }, + "value": { + "annotationValue": "Lib Title Heart 1 - C", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#characteristic_category/status_80" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [{ "@id": "#sample/22" }] + }, + { + "@id": "#other_material/37", + "name": "Lib Heart 2 - A", + "type": "library_name", + "characteristics": [ + { + "category": { "@id": "#characteristic_category/title_71" }, + "value": { + "annotationValue": "Lib Title Heart 2 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#characteristic_category/status_80" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [{ "@id": "#sample/17" }] + }, + { + "@id": "#other_material/38", + "name": "Lib Heart 2 - B", + "type": "library_name", + "characteristics": [ + { + "category": { "@id": "#characteristic_category/title_71" }, + "value": { + "annotationValue": "Lib Title Heart 2 - B", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#characteristic_category/status_80" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [{ "@id": "#sample/24" }] + }, + { + "@id": "#other_material/39", + "name": "Lib Heart 2 - C", + "type": "library_name", + "characteristics": [ + { + "category": { "@id": "#characteristic_category/title_71" }, + "value": { + "annotationValue": "Lib Title Heart 2 - C", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#characteristic_category/status_80" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [{ "@id": "#sample/25" }] + }, + { + "@id": "#other_material/40", + "name": "Lib Heart 3 - A", + "type": "library_name", + "characteristics": [ + { + "category": { "@id": "#characteristic_category/title_71" }, + "value": { + "annotationValue": "Lib Title Heart 3 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#characteristic_category/status_80" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [{ "@id": "#sample/18" }] + }, + { + "@id": "#other_material/41", + "name": "Lib Heart 3 - B", + "type": "library_name", + "characteristics": [ + { + "category": { "@id": "#characteristic_category/title_71" }, + "value": { + "annotationValue": "Lib Title Heart 3 - B", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#characteristic_category/status_80" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [{ "@id": "#sample/26" }] + }, + { + "@id": "#other_material/42", + "name": "Lib Heart 3 - C", + "type": "library_name", + "characteristics": [ + { + "category": { "@id": "#characteristic_category/title_71" }, + "value": { + "annotationValue": "Lib Title Heart 3 - C", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#characteristic_category/status_80" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [{ "@id": "#sample/27" }] + }, + { + "@id": "#other_material/43", + "name": "Lib Brain 1 - A", + "type": "library_name", + "characteristics": [ + { + "category": { "@id": "#characteristic_category/title_71" }, + "value": { + "annotationValue": "Lib Title Brain 1 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#characteristic_category/status_80" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [{ "@id": "#sample/19" }] + }, + { + "@id": "#other_material/44", + "name": "Lib Brain 1 - B", + "type": "library_name", + "characteristics": [ + { + "category": { "@id": "#characteristic_category/title_71" }, + "value": { + "annotationValue": "Lib Title Brain 1 - B", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#characteristic_category/status_80" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [{ "@id": "#sample/28" }] + }, + { + "@id": "#other_material/45", + "name": "Lib Brain 1 - C", + "type": "library_name", + "characteristics": [ + { + "category": { "@id": "#characteristic_category/title_71" }, + "value": { + "annotationValue": "Lib Title Brain 1 - C", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#characteristic_category/status_80" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [{ "@id": "#sample/29" }] + }, + { + "@id": "#other_material/46", + "name": "Lib Brain 2 - A", + "type": "library_name", + "characteristics": [ + { + "category": { "@id": "#characteristic_category/title_71" }, + "value": { + "annotationValue": "Lib Title Brain 2 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#characteristic_category/status_80" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [{ "@id": "#sample/20" }] + }, + { + "@id": "#other_material/47", + "name": "Lib Brain 2 - B", + "type": "library_name", + "characteristics": [ + { + "category": { "@id": "#characteristic_category/title_71" }, + "value": { + "annotationValue": "Lib Title Brain 2 - B", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#characteristic_category/status_80" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [{ "@id": "#sample/30" }] + }, + { + "@id": "#other_material/48", + "name": "Lib Brain 2 - C", + "type": "library_name", + "characteristics": [ + { + "category": { "@id": "#characteristic_category/title_71" }, + "value": { + "annotationValue": "Lib Title Brain 2 - C", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#characteristic_category/status_80" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [{ "@id": "#sample/31" }] + }, + { + "@id": "#other_material/49", + "name": "Lib Brain 3 - A", + "type": "library_name", + "characteristics": [ + { + "category": { "@id": "#characteristic_category/title_71" }, + "value": { + "annotationValue": "Lib Title Brain 3 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#characteristic_category/status_80" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [{ "@id": "#sample/21" }] + }, + { + "@id": "#other_material/50", + "name": "Lib Brain 3 - B", + "type": "library_name", + "characteristics": [ + { + "category": { "@id": "#characteristic_category/title_71" }, + "value": { + "annotationValue": "Lib Title Brain 3 - B", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#characteristic_category/status_80" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [{ "@id": "#sample/32" }] + }, + { + "@id": "#other_material/51", + "name": "Lib Brain 3 - C", + "type": "library_name", + "characteristics": [ + { + "category": { "@id": "#characteristic_category/title_71" }, + "value": { + "annotationValue": "Lib Title Brain 3 - C", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#characteristic_category/status_80" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [{ "@id": "#sample/33" }] + } + ] + }, + "processSequence": [ + { + "@id": "#process/library_construction/34", + "name": "", + "executesProtocol": { "@id": "#protocol/_9" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/70" }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/72" }, + "value": { + "annotationValue": "Lib description Heart 1 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/73" }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/74" }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/75" }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/76" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/77" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/sample_collection/34" }, + "nextProcess": { "@id": "#process/nucleic_acid_sequencing/34" }, + "inputs": [{ "@id": "#sample/16" }], + "outputs": [{ "@id": "#sample/34" }] + }, + { + "@id": "#process/library_construction/35", + "name": "", + "executesProtocol": { "@id": "#protocol/_9" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/70" }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/72" }, + "value": { + "annotationValue": "Lib description Heart 1 - B", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/73" }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/74" }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/75" }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/76" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/77" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/sample_collection/35" }, + "nextProcess": { "@id": "#process/nucleic_acid_sequencing/35" }, + "inputs": [{ "@id": "#sample/23" }], + "outputs": [{ "@id": "#sample/35" }] + }, + { + "@id": "#process/library_construction/36", + "name": "", + "executesProtocol": { "@id": "#protocol/_9" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/70" }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/72" }, + "value": { + "annotationValue": "Lib description Heart 1 - C", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/73" }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/74" }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/75" }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/76" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/77" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/sample_collection/36" }, + "nextProcess": { "@id": "#process/nucleic_acid_sequencing/36" }, + "inputs": [{ "@id": "#sample/22" }], + "outputs": [{ "@id": "#sample/36" }] + }, + { + "@id": "#process/library_construction/37", + "name": "", + "executesProtocol": { "@id": "#protocol/_9" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/70" }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/72" }, + "value": { + "annotationValue": "Lib description Heart 2 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/73" }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/74" }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/75" }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/76" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/77" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/sample_collection/37" }, + "nextProcess": { "@id": "#process/nucleic_acid_sequencing/37" }, + "inputs": [{ "@id": "#sample/17" }], + "outputs": [{ "@id": "#sample/37" }] + }, + { + "@id": "#process/library_construction/38", + "name": "", + "executesProtocol": { "@id": "#protocol/_9" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/70" }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/72" }, + "value": { + "annotationValue": "Lib description Heart 2 - B", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/73" }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/74" }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/75" }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/76" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/77" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/sample_collection/38" }, + "nextProcess": { "@id": "#process/nucleic_acid_sequencing/38" }, + "inputs": [{ "@id": "#sample/24" }], + "outputs": [{ "@id": "#sample/38" }] + }, + { + "@id": "#process/library_construction/39", + "name": "", + "executesProtocol": { "@id": "#protocol/_9" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/70" }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/72" }, + "value": { + "annotationValue": "Lib description Heart 2 - C", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/73" }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/74" }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/75" }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/76" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/77" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/sample_collection/39" }, + "nextProcess": { "@id": "#process/nucleic_acid_sequencing/39" }, + "inputs": [{ "@id": "#sample/25" }], + "outputs": [{ "@id": "#sample/39" }] + }, + { + "@id": "#process/library_construction/40", + "name": "", + "executesProtocol": { "@id": "#protocol/_9" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/70" }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/72" }, + "value": { + "annotationValue": "Lib description Heart 3 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/73" }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/74" }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/75" }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/76" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/77" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/sample_collection/40" }, + "nextProcess": { "@id": "#process/nucleic_acid_sequencing/40" }, + "inputs": [{ "@id": "#sample/18" }], + "outputs": [{ "@id": "#sample/40" }] + }, + { + "@id": "#process/library_construction/41", + "name": "", + "executesProtocol": { "@id": "#protocol/_9" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/70" }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/72" }, + "value": { + "annotationValue": "Lib description Heart 3 - B", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/73" }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/74" }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/75" }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/76" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/77" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/sample_collection/41" }, + "nextProcess": { "@id": "#process/nucleic_acid_sequencing/41" }, + "inputs": [{ "@id": "#sample/26" }], + "outputs": [{ "@id": "#sample/41" }] + }, + { + "@id": "#process/library_construction/42", + "name": "", + "executesProtocol": { "@id": "#protocol/_9" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/70" }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/72" }, + "value": { + "annotationValue": "Lib description Heart 3 - C", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/73" }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/74" }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/75" }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/76" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/77" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/sample_collection/42" }, + "nextProcess": { "@id": "#process/nucleic_acid_sequencing/42" }, + "inputs": [{ "@id": "#sample/27" }], + "outputs": [{ "@id": "#sample/42" }] + }, + { + "@id": "#process/library_construction/43", + "name": "", + "executesProtocol": { "@id": "#protocol/_9" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/70" }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/72" }, + "value": { + "annotationValue": "Lib description Brain 1 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/73" }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/74" }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/75" }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/76" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/77" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/sample_collection/43" }, + "nextProcess": { "@id": "#process/nucleic_acid_sequencing/43" }, + "inputs": [{ "@id": "#sample/19" }], + "outputs": [{ "@id": "#sample/43" }] + }, + { + "@id": "#process/library_construction/44", + "name": "", + "executesProtocol": { "@id": "#protocol/_9" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/70" }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/72" }, + "value": { + "annotationValue": "Lib description Brain 1 - B", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/73" }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/74" }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/75" }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/76" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/77" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/sample_collection/44" }, + "nextProcess": { "@id": "#process/nucleic_acid_sequencing/44" }, + "inputs": [{ "@id": "#sample/28" }], + "outputs": [{ "@id": "#sample/44" }] + }, + { + "@id": "#process/library_construction/45", + "name": "", + "executesProtocol": { "@id": "#protocol/_9" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/70" }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/72" }, + "value": { + "annotationValue": "Lib description Brain 1 - C", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/73" }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/74" }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/75" }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/76" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/77" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/sample_collection/45" }, + "nextProcess": { "@id": "#process/nucleic_acid_sequencing/45" }, + "inputs": [{ "@id": "#sample/29" }], + "outputs": [{ "@id": "#sample/45" }] + }, + { + "@id": "#process/library_construction/46", + "name": "", + "executesProtocol": { "@id": "#protocol/_9" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/70" }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/72" }, + "value": { + "annotationValue": "Lib description Brain 2 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/73" }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/74" }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/75" }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/76" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/77" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/sample_collection/46" }, + "nextProcess": { "@id": "#process/nucleic_acid_sequencing/46" }, + "inputs": [{ "@id": "#sample/20" }], + "outputs": [{ "@id": "#sample/46" }] + }, + { + "@id": "#process/library_construction/47", + "name": "", + "executesProtocol": { "@id": "#protocol/_9" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/70" }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/72" }, + "value": { + "annotationValue": "Lib description Brain 2 - B", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/73" }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/74" }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/75" }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/76" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/77" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/sample_collection/47" }, + "nextProcess": { "@id": "#process/nucleic_acid_sequencing/47" }, + "inputs": [{ "@id": "#sample/30" }], + "outputs": [{ "@id": "#sample/47" }] + }, + { + "@id": "#process/library_construction/48", + "name": "", + "executesProtocol": { "@id": "#protocol/_9" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/70" }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/72" }, + "value": { + "annotationValue": "Lib description Brain 2 - C", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/73" }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/74" }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/75" }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/76" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/77" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/sample_collection/48" }, + "nextProcess": { "@id": "#process/nucleic_acid_sequencing/48" }, + "inputs": [{ "@id": "#sample/31" }], + "outputs": [{ "@id": "#sample/48" }] + }, + { + "@id": "#process/library_construction/49", + "name": "", + "executesProtocol": { "@id": "#protocol/_9" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/70" }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/72" }, + "value": { + "annotationValue": "Lib description Brain 3 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/73" }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/74" }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/75" }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/76" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/77" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/sample_collection/49" }, + "nextProcess": { "@id": "#process/nucleic_acid_sequencing/49" }, + "inputs": [{ "@id": "#sample/21" }], + "outputs": [{ "@id": "#sample/49" }] + }, + { + "@id": "#process/library_construction/50", + "name": "", + "executesProtocol": { "@id": "#protocol/_9" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/70" }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/72" }, + "value": { + "annotationValue": "Lib description Brain 3 - B", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/73" }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/74" }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/75" }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/76" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/77" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/sample_collection/50" }, + "nextProcess": { "@id": "#process/nucleic_acid_sequencing/50" }, + "inputs": [{ "@id": "#sample/32" }], + "outputs": [{ "@id": "#sample/50" }] + }, + { + "@id": "#process/library_construction/51", + "name": "", + "executesProtocol": { "@id": "#protocol/_9" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/70" }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/72" }, + "value": { + "annotationValue": "Lib description Brain 3 - C", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/73" }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/74" }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/75" }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/76" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/77" }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/sample_collection/51" }, + "nextProcess": { "@id": "#process/nucleic_acid_sequencing/51" }, + "inputs": [{ "@id": "#sample/33" }], + "outputs": [{ "@id": "#sample/51" }] + }, + { + "@id": "#process/nucleic_acid_sequencing/52", + "name": "", + "executesProtocol": { "@id": "#protocol/_10" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/84" }, + "value": { + "annotationValue": "Illumina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/85" }, + "value": { + "annotationValue": "Illumina NovaSeq 6000", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/library_construction/52" }, + "nextProcess": {}, + "inputs": [ + { "@id": "#other_material/34" }, + { "@id": "#other_material/35" }, + { "@id": "#other_material/36" } + ], + "outputs": [{ "@id": "#sample/52" }] + }, + { + "@id": "#process/nucleic_acid_sequencing/53", + "name": "", + "executesProtocol": { "@id": "#protocol/_10" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/84" }, + "value": { + "annotationValue": "Illumina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/85" }, + "value": { + "annotationValue": "Illumina NovaSeq 6000", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/library_construction/53" }, + "nextProcess": {}, + "inputs": [ + { "@id": "#other_material/37" }, + { "@id": "#other_material/38" }, + { "@id": "#other_material/39" } + ], + "outputs": [{ "@id": "#sample/53" }] + }, + { + "@id": "#process/nucleic_acid_sequencing/54", + "name": "", + "executesProtocol": { "@id": "#protocol/_10" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/84" }, + "value": { + "annotationValue": "Illumina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/85" }, + "value": { + "annotationValue": "Illumina NovaSeq 6000", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/library_construction/54" }, + "nextProcess": {}, + "inputs": [ + { "@id": "#other_material/40" }, + { "@id": "#other_material/41" }, + { "@id": "#other_material/42" } + ], + "outputs": [{ "@id": "#sample/54" }] + }, + { + "@id": "#process/nucleic_acid_sequencing/55", + "name": "", + "executesProtocol": { "@id": "#protocol/_10" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/84" }, + "value": { + "annotationValue": "Illumina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/85" }, + "value": { + "annotationValue": "Illumina NovaSeq 6000", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/library_construction/55" }, + "nextProcess": {}, + "inputs": [ + { "@id": "#other_material/43" }, + { "@id": "#other_material/44" }, + { "@id": "#other_material/45" } + ], + "outputs": [{ "@id": "#sample/55" }] + }, + { + "@id": "#process/nucleic_acid_sequencing/56", + "name": "", + "executesProtocol": { "@id": "#protocol/_10" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/84" }, + "value": { + "annotationValue": "Illumina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/85" }, + "value": { + "annotationValue": "Illumina NovaSeq 6000", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/library_construction/56" }, + "nextProcess": {}, + "inputs": [ + { "@id": "#other_material/46" }, + { "@id": "#other_material/47" }, + { "@id": "#other_material/48" } + ], + "outputs": [{ "@id": "#sample/56" }] + }, + { + "@id": "#process/nucleic_acid_sequencing/57", + "name": "", + "executesProtocol": { "@id": "#protocol/_10" }, + "parameterValues": [ + { + "category": { "@id": "#parameter/84" }, + "value": { + "annotationValue": "Illumina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { "@id": "#parameter/85" }, + "value": { + "annotationValue": "Illumina NovaSeq 6000", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { "@id": "#process/library_construction/57" }, + "nextProcess": {}, + "inputs": [ + { "@id": "#other_material/49" }, + { "@id": "#other_material/50" }, + { "@id": "#other_material/51" } + ], + "outputs": [{ "@id": "#sample/57" }] + } + ], + "dataFiles": [ + { + "@id": "#data_file/52", + "name": "Datafile Dragon Heart 1.fastq", + "type": "Raw Data File", + "comments": [ + { "name": "file_type", "value": "fastq" }, + { "name": "file checksum", "value": "" }, + { "name": "submission date", "value": "" }, + { "name": "status", "value": "" }, + { "name": "accession", "value": "" } + ] + }, + { + "@id": "#data_file/53", + "name": "Datafile Dragon Heart 2.fastq", + "type": "Raw Data File", + "comments": [ + { "name": "file_type", "value": "fastq" }, + { "name": "file checksum", "value": "" }, + { "name": "submission date", "value": "" }, + { "name": "status", "value": "" }, + { "name": "accession", "value": "" } + ] + }, + { + "@id": "#data_file/54", + "name": "Datafile Dragon Heart 3.fastq", + "type": "Raw Data File", + "comments": [ + { "name": "file_type", "value": "fastq" }, + { "name": "file checksum", "value": "" }, + { "name": "submission date", "value": "" }, + { "name": "status", "value": "" }, + { "name": "accession", "value": "" } + ] + }, + { + "@id": "#data_file/55", + "name": "Datafile Dragon Brain 1.fastq", + "type": "Raw Data File", + "comments": [ + { "name": "file_type", "value": "fastq" }, + { "name": "file checksum", "value": "" }, + { "name": "submission date", "value": "" }, + { "name": "status", "value": "" }, + { "name": "accession", "value": "" } + ] + }, + { + "@id": "#data_file/56", + "name": "Datafile Dragon Brain 2.fastq", + "type": "Raw Data File", + "comments": [ + { "name": "file_type", "value": "fastq" }, + { "name": "file checksum", "value": "" }, + { "name": "submission date", "value": "" }, + { "name": "status", "value": "" }, + { "name": "accession", "value": "" } + ] + }, + { + "@id": "#data_file/57", + "name": "Datafile Dragon Brain 3.fastq", + "type": "Raw Data File", + "comments": [ + { "name": "file_type", "value": "fastq" }, + { "name": "file checksum", "value": "" }, + { "name": "submission date", "value": "" }, + { "name": "status", "value": "" }, + { "name": "accession", "value": "" } + ] + } + ], + "unitCategories": [] + } + ], + "factors": [], + "unitCategories": [] + } + ] +} From 572c49bac934e73599beae0ed3dbaec1acd7a9c9 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 22 Sep 2023 18:04:11 +0200 Subject: [PATCH 02/62] Updated isa_study.py: - Initiate IsaStudy directly from isa JSON file - Add typing system - Add Dict validation before creating the IsaStudy object --- read-isa-json/isa_objects/isa_study.py | 38 ++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/read-isa-json/isa_objects/isa_study.py b/read-isa-json/isa_objects/isa_study.py index c3a0c56..008d901 100644 --- a/read-isa-json/isa_objects/isa_study.py +++ b/read-isa-json/isa_objects/isa_study.py @@ -1,13 +1,45 @@ +from typing import List, Optional, Dict + + +def study_publication_ids(publication_isa_json) -> List[int]: + return [pub["id"] for pub in publication_isa_json] + + +def validate_isa_json(isa_json: Dict, key: str) -> None: + if not key in isa_json.keys(): + raise KeyError(f"{key} was not found in the provided ISA JSON.") + + class IsaStudy: def __init__( - self, alias, title, study_type, study_abstract, new_study_type=None + self, + alias: str, + title: str, + study_type: str, + study_abstract: str, + new_study_type=Optional[str], + pubmed_id=Optional[List[int]], ) -> None: self.alias = alias self.title = title self.study_type = study_type self.new_study_type = new_study_type self.study_abstract = study_abstract + self.pubmed_id = pubmed_id - @classmethod def from_isa_json(isa_json): - pass \ No newline at end of file + mandatory_keys = ["title", "description", "publications"] + [validate_isa_json(isa_json, key) for key in mandatory_keys] + + return [ + IsaStudy( + alias="", # TODO: Add SEEK URL of Study + title=study["title"], + study_type="", # TODO: Replace by Custom metadata of the Assay level + study_abstract=study["description"], + pubmed_id=study_publication_ids( + publication_isa_json=study["publications"] + ), + ) + for study in isa_json["studies"] + ] From 72e3c56d362e6911bf0be2111d14159204e063fc Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 22 Sep 2023 18:04:51 +0200 Subject: [PATCH 03/62] Update test file. --- read-isa-json/read_isa_json.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/read-isa-json/read_isa_json.py b/read-isa-json/read_isa_json.py index cbb81b4..1e05ed0 100644 --- a/read-isa-json/read_isa_json.py +++ b/read-isa-json/read_isa_json.py @@ -1,15 +1,11 @@ import json -from isa_objects import isa_study +from isa_objects.isa_study import IsaStudy from rich import print_json # Read json file -isa_json_file = open('test_isa_json_files/test_local_instance.json') +isa_json_file = open("read-isa-json/test_isa_json_files/test_local_instance.json") isa_json = json.load(isa_json_file) -# print_json(data = isa_json) -# Extracting the Study information -studies = isa_json['studies'] -print_json(data=studies) - -# study_information = isa_study.IsaStudy.from_isa_json(isa_json) \ No newline at end of file +study_information = IsaStudy.from_isa_json(isa_json) +print(study_information) From 511e0f76b1bd145405274ca800eb17b454415366 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 22 Sep 2023 20:20:53 +0200 Subject: [PATCH 04/62] Add method to dump object in a pandas DataFrame and renamed to Ena Object and Ena study --- .../isa_study.py => ena_objects/ena_study.py} | 11 +++++++++-- read-isa-json/read_isa_json.py | 7 ++++--- 2 files changed, 13 insertions(+), 5 deletions(-) rename read-isa-json/{isa_objects/isa_study.py => ena_objects/ena_study.py} (84%) diff --git a/read-isa-json/isa_objects/isa_study.py b/read-isa-json/ena_objects/ena_study.py similarity index 84% rename from read-isa-json/isa_objects/isa_study.py rename to read-isa-json/ena_objects/ena_study.py index 008d901..3411bd7 100644 --- a/read-isa-json/isa_objects/isa_study.py +++ b/read-isa-json/ena_objects/ena_study.py @@ -1,4 +1,5 @@ from typing import List, Optional, Dict +from pandas import DataFrame def study_publication_ids(publication_isa_json) -> List[int]: @@ -10,7 +11,7 @@ def validate_isa_json(isa_json: Dict, key: str) -> None: raise KeyError(f"{key} was not found in the provided ISA JSON.") -class IsaStudy: +class EnaStudy: def __init__( self, alias: str, @@ -32,7 +33,7 @@ def from_isa_json(isa_json): [validate_isa_json(isa_json, key) for key in mandatory_keys] return [ - IsaStudy( + EnaStudy( alias="", # TODO: Add SEEK URL of Study title=study["title"], study_type="", # TODO: Replace by Custom metadata of the Assay level @@ -43,3 +44,9 @@ def from_isa_json(isa_json): ) for study in isa_json["studies"] ] + + def to_dataframe(self) -> DataFrame: + """ + Dumps the study object in a pandas DataFrame of the object + """ + return DataFrame.from_dict(vars(self)) diff --git a/read-isa-json/read_isa_json.py b/read-isa-json/read_isa_json.py index 1e05ed0..c61c946 100644 --- a/read-isa-json/read_isa_json.py +++ b/read-isa-json/read_isa_json.py @@ -1,5 +1,5 @@ import json -from isa_objects.isa_study import IsaStudy +from ena_objects.ena_study import EnaStudy from rich import print_json @@ -7,5 +7,6 @@ isa_json_file = open("read-isa-json/test_isa_json_files/test_local_instance.json") isa_json = json.load(isa_json_file) -study_information = IsaStudy.from_isa_json(isa_json) -print(study_information) +studies = EnaStudy.from_isa_json(isa_json) +study_dfs = [study.to_dataframe() for study in studies] +print(study_dfs[0]) From 89675d6cb2c081a02cb34af14e1003286b6fdf48 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 22 Sep 2023 21:32:45 +0200 Subject: [PATCH 05/62] Update EnaStudy: - Add study alias - Add std module for common functions - Add docstrings --- read-isa-json/ena_objects/ena_std_lib.py | 39 ++++++++++++++++++ read-isa-json/ena_objects/ena_study.py | 52 +++++++++++++++++++----- read-isa-json/read_isa_json.py | 2 - 3 files changed, 81 insertions(+), 12 deletions(-) create mode 100644 read-isa-json/ena_objects/ena_std_lib.py diff --git a/read-isa-json/ena_objects/ena_std_lib.py b/read-isa-json/ena_objects/ena_std_lib.py new file mode 100644 index 0000000..aa2ce0d --- /dev/null +++ b/read-isa-json/ena_objects/ena_std_lib.py @@ -0,0 +1,39 @@ +from typing import Dict + + +def filter_attribute_by(attribute_list: str, key: str, value: str) -> Dict: + """Filters out the the attributes by key-value matching in the ISA JSON + + Args: + element (str): _description_ + key (str): _description_ + value (str): _description_ + + Example: + my_element = {"comments": [ + { "name": "SEEK Study ID", "value": "2" }, + { "name": "SEEK creation date", "value": "2023-09-22T06:14:34Z" } + ] + } + filter_attribute_by(element = my_element, key = 'name', value= 'SEEK Study ID') + + Output: { "name": "SEEK Study ID", "value": "2" } + + Returns: + Dict: The Dict that matches the criteria + """ + return [attribute for attribute in attribute_list if attribute[key] == value] + + +def validate_isa_json(isa_json: Dict, key: str) -> None: + """Raises an error if the structure of the ISA JSON Dict is not conform + + Args: + isa_json (Dict): The ISA JSON to validate + key (str): The key to check + + Raises: + KeyError: Will display the missing key in the Dict + """ + if not key in isa_json.keys(): + raise KeyError(f"{key} was not found in the provided ISA JSON.") diff --git a/read-isa-json/ena_objects/ena_study.py b/read-isa-json/ena_objects/ena_study.py index 3411bd7..37973e7 100644 --- a/read-isa-json/ena_objects/ena_study.py +++ b/read-isa-json/ena_objects/ena_study.py @@ -1,25 +1,47 @@ from typing import List, Optional, Dict from pandas import DataFrame +from ena_objects.ena_std_lib import filter_attribute_by, validate_isa_json -def study_publication_ids(publication_isa_json) -> List[int]: +def study_publication_ids(publication_isa_json: Dict) -> List[int]: + """Retrieves the pubmed_ids from the ISA JSON + + Args: + publication_isa_json (Dict): Publication part of the ISA JSON dictionary + + Returns: + List[int]: List of pubmed ID's + """ return [pub["id"] for pub in publication_isa_json] -def validate_isa_json(isa_json: Dict, key: str) -> None: - if not key in isa_json.keys(): - raise KeyError(f"{key} was not found in the provided ISA JSON.") +def study_alias(study_isa_json: str) -> str: + """Creates a study_alias, based on information of the study part of the ISA JSON. + + Args: + study_isa_json (str): Study part of the ISA JSON + + Returns: + str: the study_alias + """ + prefix = "https://datahub.elixir-belgium.org/studies/" # TODO: Replace by something less hard-coded + seek_study_id: str = filter_attribute_by( + study_isa_json["comments"], key="name", value="SEEK Study ID" + )[0]["value"] + return prefix + seek_study_id class EnaStudy: + """Generates a Study study object, compliant to the requirements of ENA""" + def __init__( self, alias: str, title: str, study_type: str, study_abstract: str, - new_study_type=Optional[str], - pubmed_id=Optional[List[int]], + new_study_type: Optional[str] = None, + pubmed_id: Optional[List[int]] = None, ) -> None: self.alias = alias self.title = title @@ -28,13 +50,21 @@ def __init__( self.study_abstract = study_abstract self.pubmed_id = pubmed_id - def from_isa_json(isa_json): + def from_isa_json(isa_json: Dict): + """Method that creates an EnaStudy with params from ISA JSON Dictionary + + Args: + isa_json: ISA JSON Dict + + Returns: + EnaStudy: EnaStudy object + """ mandatory_keys = ["title", "description", "publications"] [validate_isa_json(isa_json, key) for key in mandatory_keys] return [ EnaStudy( - alias="", # TODO: Add SEEK URL of Study + alias=study_alias(study), title=study["title"], study_type="", # TODO: Replace by Custom metadata of the Assay level study_abstract=study["description"], @@ -46,7 +76,9 @@ def from_isa_json(isa_json): ] def to_dataframe(self) -> DataFrame: - """ - Dumps the study object in a pandas DataFrame of the object + """Dumps the study object in a pandas DataFrame of the object + + Returns: + DataFrame: Pandas DataFrame representation of the Study """ return DataFrame.from_dict(vars(self)) diff --git a/read-isa-json/read_isa_json.py b/read-isa-json/read_isa_json.py index c61c946..8c385ab 100644 --- a/read-isa-json/read_isa_json.py +++ b/read-isa-json/read_isa_json.py @@ -1,8 +1,6 @@ import json from ena_objects.ena_study import EnaStudy -from rich import print_json - # Read json file isa_json_file = open("read-isa-json/test_isa_json_files/test_local_instance.json") isa_json = json.load(isa_json_file) From 8ec21fd8ff21f84061047efa0b51effa6462ef5a Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Sun, 24 Sep 2023 18:26:18 +0200 Subject: [PATCH 06/62] Add other Classes --- read-isa-json/ena_objects/ena_experiment.py | 39 +++++++++++++++++++++ read-isa-json/ena_objects/ena_run.py | 20 +++++++++++ read-isa-json/ena_objects/ena_sample.py | 19 ++++++++++ read-isa-json/ena_objects/ena_study.py | 2 +- read-isa-json/ena_objects/ena_submission.py | 11 ++++++ 5 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 read-isa-json/ena_objects/ena_experiment.py create mode 100644 read-isa-json/ena_objects/ena_run.py create mode 100644 read-isa-json/ena_objects/ena_sample.py create mode 100644 read-isa-json/ena_objects/ena_submission.py diff --git a/read-isa-json/ena_objects/ena_experiment.py b/read-isa-json/ena_objects/ena_experiment.py new file mode 100644 index 0000000..5572f40 --- /dev/null +++ b/read-isa-json/ena_objects/ena_experiment.py @@ -0,0 +1,39 @@ +from typing import List, Dict +from ena_objects.ena_study import EnaStudy +from ena_objects.ena_sample import EnaSample + + +class EnaExperiment: + """ + Generates an Experiment object, compliant to the requirements of ENA + """ + + def __init__( + self, + alias: str, + title: str, + study: EnaStudy, + sample: EnaSample, + library_name: str, + ibrary_strategy, + library_source: str, + library_selection: str, + library_layout: str, + insert_size: str, + library_construction_protocol: str, + platform: str, + instrument_model: str, + ) -> None: + self.alias = alias + self.title = title + self.study = study + self.sample = sample + self.library_name = library_name + self.ibrary_strategy = ibrary_strategy + self.library_source = library_source + self.library_selection = library_selection + self.library_layout = library_layout + self.insert_size = insert_size + self.library_construction_protocol = library_construction_protocol + self.platform = platform + self.instrument_model = instrument_model diff --git a/read-isa-json/ena_objects/ena_run.py b/read-isa-json/ena_objects/ena_run.py new file mode 100644 index 0000000..148c5da --- /dev/null +++ b/read-isa-json/ena_objects/ena_run.py @@ -0,0 +1,20 @@ +from typing import List, Dict +from ena_objects.ena_experiment import EnaExperiment + + +class EnaRun: + """ + Generates a Run object, compliant to the requirements of ENA + """ + + def __init__( + self, + alias: str, + experiments: List[EnaExperiment], + filename: str, + file_type: str, + ) -> None: + self.alias = alias + self.experiments = experiments + self.filename = filename + self.file_type = file_type diff --git a/read-isa-json/ena_objects/ena_sample.py b/read-isa-json/ena_objects/ena_sample.py new file mode 100644 index 0000000..09237db --- /dev/null +++ b/read-isa-json/ena_objects/ena_sample.py @@ -0,0 +1,19 @@ +from typing import List, Dict + + +class EnaSample: + """ + Generates an Sample object, compliant to the requirements of ENA + """ + + def __init__( + self, + alias: str, + status: str, + taxon_id: int, + sample_description: str, + ) -> None: + self.alias = alias + self.status = status + self.taxon_id = taxon_id + self.sample_description = sample_description diff --git a/read-isa-json/ena_objects/ena_study.py b/read-isa-json/ena_objects/ena_study.py index 37973e7..7777dfe 100644 --- a/read-isa-json/ena_objects/ena_study.py +++ b/read-isa-json/ena_objects/ena_study.py @@ -32,7 +32,7 @@ def study_alias(study_isa_json: str) -> str: class EnaStudy: - """Generates a Study study object, compliant to the requirements of ENA""" + """Generates a Study object, compliant to the requirements of ENA""" def __init__( self, diff --git a/read-isa-json/ena_objects/ena_submission.py b/read-isa-json/ena_objects/ena_submission.py new file mode 100644 index 0000000..d7eb76f --- /dev/null +++ b/read-isa-json/ena_objects/ena_submission.py @@ -0,0 +1,11 @@ +from typing import List, Dict +from ena_objects.ena_run import EnaRun + + +class EnaSubmission: + """ + Generates a Submission object, compliant to the requirements of ENA + """ + + def __init__(self, runs: List[EnaRun]) -> None: + self.runs = runs From 057df1f1576b33b4d196bdb9b5f29fb8858bc14e Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Mon, 25 Sep 2023 11:50:10 +0200 Subject: [PATCH 07/62] Replaced test ISA Json by one without pooled samples --- .../isa_json_test_investigation.json | 2536 ++++++++++ .../test_local_instance.json | 4427 ----------------- 2 files changed, 2536 insertions(+), 4427 deletions(-) create mode 100644 read-isa-json/test_isa_json_files/isa_json_test_investigation.json delete mode 100644 read-isa-json/test_isa_json_files/test_local_instance.json diff --git a/read-isa-json/test_isa_json_files/isa_json_test_investigation.json b/read-isa-json/test_isa_json_files/isa_json_test_investigation.json new file mode 100644 index 0000000..888fb44 --- /dev/null +++ b/read-isa-json/test_isa_json_files/isa_json_test_investigation.json @@ -0,0 +1,2536 @@ +{ + "identifier": "", + "title": "ENA upload investigation", + "description": "", + "submissionDate": "", + "publicReleaseDate": "", + "ontologySourceReferences": [], + "filename": "ENA upload investigation.txt", + "comments": [ + { + "name": "ISAjson export time", + "value": "2023-09-25T09:47:35Z" + }, + { + "name": "SEEK Project name", + "value": "ENA Upload Project" + }, + { + "name": "SEEK Project ID", + "value": "http://localhost:3000/single_pages/3" + }, + { + "name": "SEEK Investigation ID", + "value": "2" + } + ], + "publications": [], + "people": [ + { + "@id": "#people/1", + "lastName": "De Pelseneer", + "firstName": "Kevin", + "midInitials": "", + "email": "jklhndvkjdsbvs@mail.com", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { + "termAccession": "", + "termSource": "", + "annotationValue": "" + } + ], + "comments": [ + { + "@id": "", + "value": "", + "name": "" + } + ] + } + ], + "studies": [ + { + "identifier": "", + "title": "ENA Upload Study", + "description": "", + "submissionDate": "", + "publicReleaseDate": "", + "filename": "ENA Upload Study.txt", + "comments": [ + { + "name": "SEEK Study ID", + "value": "2" + }, + { + "name": "SEEK creation date", + "value": "2023-09-22T06:14:34Z" + } + ], + "publications": [ + { + "id": 1, + "pubmed_id": 666, + "title": "The number of the beast", + "abstract": "", + "published_date": "2023-09-22", + "journal": "", + "first_letter": "T", + "contributor_id": 1, + "created_at": "2023-09-22T06:10:30.000Z", + "updated_at": "2023-09-22T06:10:30.000Z", + "doi": null, + "uuid": "a800ee30-3b3c-013c-36ae-2c91a17e7bb9", + "policy_id": 43, + "citation": "", + "deleted_contributor": null, + "registered_mode": 3, + "booktitle": "", + "publisher": "", + "editor": "yo mama", + "publication_type_id": 1, + "url": "", + "version": 1, + "license": null, + "other_creators": null + } + ], + "people": [ + { + "@id": "#people/1", + "lastName": "De Pelseneer", + "firstName": "Kevin", + "midInitials": "", + "email": "jklhndvkjdsbvs@mail.com", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { + "termAccession": "", + "termSource": "", + "annotationValue": "" + } + ], + "comments": [ + { + "@id": "", + "value": "", + "name": "" + } + ] + } + ], + "studyDesignDescriptors": [], + "characteristicCategories": [ + { + "@id": "#characteristic_category/title_62", + "characteristicType": { + "annotationValue": "title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/description_63", + "characteristicType": { + "annotationValue": "description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/taxon_id_64", + "characteristicType": { + "annotationValue": "taxon_id", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "sources": [ + { + "@id": "#source/13", + "name": "Source 1", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_62" + }, + "value": { + "annotationValue": "Source awsome dragon 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/description_63" + }, + "value": { + "annotationValue": "This is my first source. Be gentle!", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/taxon_id_64" + }, + "value": { + "annotationValue": "103695", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#source/14", + "name": "Source 2", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_62" + }, + "value": { + "annotationValue": "Source awsome dragon 2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/description_63" + }, + "value": { + "annotationValue": "This is the second source. This is my control group!", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/taxon_id_64" + }, + "value": { + "annotationValue": "103695", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#source/15", + "name": "Source 3", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_62" + }, + "value": { + "annotationValue": "Source awsome dragon 3", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/description_63" + }, + "value": { + "annotationValue": "This is the third source. Meh! I have two others...", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/taxon_id_64" + }, + "value": { + "annotationValue": "103695", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ], + "samples": [ + { + "@id": "#sample/16", + "name": "Dragon heart source 1 - A", + "derivesFrom": [ + { + "@id": "#source/13" + } + ], + "characteristics": [], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/17", + "name": "Dragon heart source 2 - A", + "derivesFrom": [ + { + "@id": "#source/14" + } + ], + "characteristics": [], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/18", + "name": "Dragon heart source 3 - A", + "derivesFrom": [ + { + "@id": "#source/15" + } + ], + "characteristics": [], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/19", + "name": "Dragon brain source 1 - A", + "derivesFrom": [ + { + "@id": "#source/13" + } + ], + "characteristics": [], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/20", + "name": "Dragon brain source 2 - A", + "derivesFrom": [ + { + "@id": "#source/14" + } + ], + "characteristics": [], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/21", + "name": "Dragon brain source 3 - A", + "derivesFrom": [ + { + "@id": "#source/15" + } + ], + "characteristics": [], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ] + }, + "protocols": [ + { + "@id": "#protocol/_2", + "name": "sample collection", + "protocolType": { + "annotationValue": "sample collection", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_9", + "name": "library construction", + "protocolType": { + "annotationValue": "library construction", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [ + { + "@id": "#parameter/70", + "parameterName": { + "annotationValue": "library_construction_protocol", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/72", + "parameterName": { + "annotationValue": "design_description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/73", + "parameterName": { + "annotationValue": "library_source", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/74", + "parameterName": { + "annotationValue": "library_strategy", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/75", + "parameterName": { + "annotationValue": "library_selection", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/76", + "parameterName": { + "annotationValue": "library_layout", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/77", + "parameterName": { + "annotationValue": "insert_size", + "termAccession": "", + "termSource": "" + } + } + ], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_10", + "name": "nucleic acid sequencing", + "protocolType": { + "annotationValue": "nucleic acid sequencing", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [ + { + "@id": "#parameter/84", + "parameterName": { + "annotationValue": "platform", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/85", + "parameterName": { + "annotationValue": "instrument_model", + "termAccession": "", + "termSource": "" + } + } + ], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + } + ], + "processSequence": [ + { + "@id": "#process/sample_collection/16", + "name": "", + "executesProtocol": { + "@id": "#protocol/_2" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/13" + } + ], + "outputs": [ + { + "@id": "#sample/16" + } + ] + }, + { + "@id": "#process/sample_collection/17", + "name": "", + "executesProtocol": { + "@id": "#protocol/_2" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/14" + } + ], + "outputs": [ + { + "@id": "#sample/17" + } + ] + }, + { + "@id": "#process/sample_collection/18", + "name": "", + "executesProtocol": { + "@id": "#protocol/_2" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/15" + } + ], + "outputs": [ + { + "@id": "#sample/18" + } + ] + }, + { + "@id": "#process/sample_collection/19", + "name": "", + "executesProtocol": { + "@id": "#protocol/_2" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/13" + } + ], + "outputs": [ + { + "@id": "#sample/19" + } + ] + }, + { + "@id": "#process/sample_collection/20", + "name": "", + "executesProtocol": { + "@id": "#protocol/_2" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/14" + } + ], + "outputs": [ + { + "@id": "#sample/20" + } + ] + }, + { + "@id": "#process/sample_collection/21", + "name": "", + "executesProtocol": { + "@id": "#protocol/_2" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/15" + } + ], + "outputs": [ + { + "@id": "#sample/21" + } + ] + } + ], + "assays": [ + { + "@id": "#assay/9_10", + "filename": "a_assays.txt", + "measurementType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "technologyType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "technologyPlatform": "", + "characteristicCategories": [ + { + "@id": "#characteristic_category/title_71", + "characteristicType": { + "annotationValue": "title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_79", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_80", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/accession_81", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "samples": [], + "otherMaterials": [ + { + "@id": "#other_material/34", + "name": "Lib Heart 1 - A", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_71" + }, + "value": { + "annotationValue": "Lib Title Heart 1 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_80" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/16" + } + ] + }, + { + "@id": "#other_material/37", + "name": "Lib Heart 2 - A", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_71" + }, + "value": { + "annotationValue": "Lib Title Heart 2 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_80" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/17" + } + ] + }, + { + "@id": "#other_material/40", + "name": "Lib Heart 3 - A", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_71" + }, + "value": { + "annotationValue": "Lib Title Heart 3 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_80" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/18" + } + ] + }, + { + "@id": "#other_material/43", + "name": "Lib Brain 1 - A", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_71" + }, + "value": { + "annotationValue": "Lib Title Brain 1 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_80" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/19" + } + ] + }, + { + "@id": "#other_material/46", + "name": "Lib Brain 2 - A", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_71" + }, + "value": { + "annotationValue": "Lib Title Brain 2 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_80" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/20" + } + ] + }, + { + "@id": "#other_material/49", + "name": "Lib Brain 3 - A", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_71" + }, + "value": { + "annotationValue": "Lib Title Brain 3 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_79" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_80" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_81" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/21" + } + ] + } + ] + }, + "processSequence": [ + { + "@id": "#process/library_construction/34", + "name": "", + "executesProtocol": { + "@id": "#protocol/_9" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/70" + }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/72" + }, + "value": { + "annotationValue": "Lib description Heart 1 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/73" + }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/74" + }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/75" + }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/76" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/77" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/34" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/34" + }, + "inputs": [ + { + "@id": "#sample/16" + } + ], + "outputs": [ + { + "@id": "#sample/34" + } + ] + }, + { + "@id": "#process/library_construction/37", + "name": "", + "executesProtocol": { + "@id": "#protocol/_9" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/70" + }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/72" + }, + "value": { + "annotationValue": "Lib description Heart 2 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/73" + }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/74" + }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/75" + }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/76" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/77" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/37" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/37" + }, + "inputs": [ + { + "@id": "#sample/17" + } + ], + "outputs": [ + { + "@id": "#sample/37" + } + ] + }, + { + "@id": "#process/library_construction/40", + "name": "", + "executesProtocol": { + "@id": "#protocol/_9" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/70" + }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/72" + }, + "value": { + "annotationValue": "Lib description Heart 3 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/73" + }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/74" + }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/75" + }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/76" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/77" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/40" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/40" + }, + "inputs": [ + { + "@id": "#sample/18" + } + ], + "outputs": [ + { + "@id": "#sample/40" + } + ] + }, + { + "@id": "#process/library_construction/43", + "name": "", + "executesProtocol": { + "@id": "#protocol/_9" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/70" + }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/72" + }, + "value": { + "annotationValue": "Lib description Brain 1 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/73" + }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/74" + }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/75" + }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/76" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/77" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/43" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/43" + }, + "inputs": [ + { + "@id": "#sample/19" + } + ], + "outputs": [ + { + "@id": "#sample/43" + } + ] + }, + { + "@id": "#process/library_construction/46", + "name": "", + "executesProtocol": { + "@id": "#protocol/_9" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/70" + }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/72" + }, + "value": { + "annotationValue": "Lib description Brain 2 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/73" + }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/74" + }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/75" + }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/76" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/77" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/46" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/46" + }, + "inputs": [ + { + "@id": "#sample/20" + } + ], + "outputs": [ + { + "@id": "#sample/46" + } + ] + }, + { + "@id": "#process/library_construction/49", + "name": "", + "executesProtocol": { + "@id": "#protocol/_9" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/70" + }, + "value": { + "annotationValue": "My super awesome protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/72" + }, + "value": { + "annotationValue": "Lib description Brain 3 - A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/73" + }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/74" + }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/75" + }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/76" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/77" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/49" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/49" + }, + "inputs": [ + { + "@id": "#sample/21" + } + ], + "outputs": [ + { + "@id": "#sample/49" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/52", + "name": "", + "executesProtocol": { + "@id": "#protocol/_10" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/84" + }, + "value": { + "annotationValue": "Illumina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/85" + }, + "value": { + "annotationValue": "Illumina NovaSeq 6000", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/52" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/34" + } + ], + "outputs": [ + { + "@id": "#sample/52" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/53", + "name": "", + "executesProtocol": { + "@id": "#protocol/_10" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/84" + }, + "value": { + "annotationValue": "Illumina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/85" + }, + "value": { + "annotationValue": "Illumina NovaSeq 6000", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/53" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/37" + } + ], + "outputs": [ + { + "@id": "#sample/53" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/54", + "name": "", + "executesProtocol": { + "@id": "#protocol/_10" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/84" + }, + "value": { + "annotationValue": "Illumina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/85" + }, + "value": { + "annotationValue": "Illumina NovaSeq 6000", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/54" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/40" + } + ], + "outputs": [ + { + "@id": "#sample/54" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/55", + "name": "", + "executesProtocol": { + "@id": "#protocol/_10" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/84" + }, + "value": { + "annotationValue": "Illumina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/85" + }, + "value": { + "annotationValue": "Illumina NovaSeq 6000", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/55" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/43" + } + ], + "outputs": [ + { + "@id": "#sample/55" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/56", + "name": "", + "executesProtocol": { + "@id": "#protocol/_10" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/84" + }, + "value": { + "annotationValue": "Illumina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/85" + }, + "value": { + "annotationValue": "Illumina NovaSeq 6000", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/56" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/46" + } + ], + "outputs": [ + { + "@id": "#sample/56" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/57", + "name": "", + "executesProtocol": { + "@id": "#protocol/_10" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/84" + }, + "value": { + "annotationValue": "Illumina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/85" + }, + "value": { + "annotationValue": "Illumina NovaSeq 6000", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/57" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/49" + } + ], + "outputs": [ + { + "@id": "#sample/57" + } + ] + } + ], + "dataFiles": [ + { + "@id": "#data_file/52", + "name": "Datafile Dragon Heart 1.fastq", + "type": "Raw Data File", + "comments": [ + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + }, + { + "name": "accession", + "value": "" + } + ] + }, + { + "@id": "#data_file/53", + "name": "Datafile Dragon Heart 2.fastq", + "type": "Raw Data File", + "comments": [ + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + }, + { + "name": "accession", + "value": "" + } + ] + }, + { + "@id": "#data_file/54", + "name": "Datafile Dragon Heart 3.fastq", + "type": "Raw Data File", + "comments": [ + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + }, + { + "name": "accession", + "value": "" + } + ] + }, + { + "@id": "#data_file/55", + "name": "Datafile Dragon Brain 1.fastq", + "type": "Raw Data File", + "comments": [ + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + }, + { + "name": "accession", + "value": "" + } + ] + }, + { + "@id": "#data_file/56", + "name": "Datafile Dragon Brain 2.fastq", + "type": "Raw Data File", + "comments": [ + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + }, + { + "name": "accession", + "value": "" + } + ] + }, + { + "@id": "#data_file/57", + "name": "Datafile Dragon Brain 3.fastq", + "type": "Raw Data File", + "comments": [ + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + }, + { + "name": "accession", + "value": "" + } + ] + } + ], + "unitCategories": [] + } + ], + "factors": [], + "unitCategories": [] + } + ] +} \ No newline at end of file diff --git a/read-isa-json/test_isa_json_files/test_local_instance.json b/read-isa-json/test_isa_json_files/test_local_instance.json deleted file mode 100644 index d2e1b46..0000000 --- a/read-isa-json/test_isa_json_files/test_local_instance.json +++ /dev/null @@ -1,4427 +0,0 @@ -{ - "identifier": "", - "title": "ENA upload investigation", - "description": "", - "submissionDate": "", - "publicReleaseDate": "", - "ontologySourceReferences": [], - "filename": "ENA upload investigation.txt", - "comments": [ - { "name": "ISAjson export time", "value": "2023-09-22T07:31:31Z" }, - { "name": "SEEK Project name", "value": "ENA Upload Project" }, - { - "name": "SEEK Project ID", - "value": "http://localhost:3000/single_pages/3" - }, - { "name": "SEEK Investigation ID", "value": "2" } - ], - "publications": [], - "people": [ - { - "@id": "#people/1", - "lastName": "De Pelseneer", - "firstName": "Kevin", - "midInitials": "", - "email": "jklhndvkjdsbvs@mail.com", - "phone": "", - "fax": "", - "address": "", - "affiliation": "", - "roles": [ - { "termAccession": "", "termSource": "", "annotationValue": "" } - ], - "comments": [{ "@id": "", "value": "", "name": "" }] - } - ], - "studies": [ - { - "identifier": "", - "title": "ENA Upload Study", - "description": "", - "submissionDate": "", - "publicReleaseDate": "", - "filename": "ENA Upload Study.txt", - "comments": [ - { "name": "SEEK Study ID", "value": "2" }, - { "name": "SEEK creation date", "value": "2023-09-22T06:14:34Z" } - ], - "publications": [ - { - "id": 1, - "pubmed_id": 666, - "title": "The number of the beast", - "abstract": "", - "published_date": "2023-09-22", - "journal": "", - "first_letter": "T", - "contributor_id": 1, - "created_at": "2023-09-22T06:10:30.000Z", - "updated_at": "2023-09-22T06:10:30.000Z", - "doi": null, - "uuid": "a800ee30-3b3c-013c-36ae-2c91a17e7bb9", - "policy_id": 43, - "citation": "", - "deleted_contributor": null, - "registered_mode": 3, - "booktitle": "", - "publisher": "", - "editor": "yo mama", - "publication_type_id": 1, - "url": "", - "version": 1, - "license": null, - "other_creators": null - } - ], - "people": [ - { - "@id": "#people/1", - "lastName": "De Pelseneer", - "firstName": "Kevin", - "midInitials": "", - "email": "jklhndvkjdsbvs@mail.com", - "phone": "", - "fax": "", - "address": "", - "affiliation": "", - "roles": [ - { "termAccession": "", "termSource": "", "annotationValue": "" } - ], - "comments": [{ "@id": "", "value": "", "name": "" }] - } - ], - "studyDesignDescriptors": [], - "characteristicCategories": [ - { - "@id": "#characteristic_category/title_62", - "characteristicType": { - "annotationValue": "title", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#characteristic_category/description_63", - "characteristicType": { - "annotationValue": "description", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#characteristic_category/taxon_id_64", - "characteristicType": { - "annotationValue": "taxon_id", - "termAccession": "", - "termSource": "" - } - } - ], - "materials": { - "sources": [ - { - "@id": "#source/13", - "name": "Source 1", - "characteristics": [ - { - "category": { "@id": "#characteristic_category/title_62" }, - "value": { - "annotationValue": "Source awsome dragon 1", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/description_63" - }, - "value": { - "annotationValue": "This is my first source. Be gentle!", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#characteristic_category/taxon_id_64" }, - "value": { - "annotationValue": "103695", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#source/14", - "name": "Source 2", - "characteristics": [ - { - "category": { "@id": "#characteristic_category/title_62" }, - "value": { - "annotationValue": "Source awsome dragon 2", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/description_63" - }, - "value": { - "annotationValue": "This is the second source. This is my control group!", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#characteristic_category/taxon_id_64" }, - "value": { - "annotationValue": "103695", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#source/15", - "name": "Source 3", - "characteristics": [ - { - "category": { "@id": "#characteristic_category/title_62" }, - "value": { - "annotationValue": "Source awsome dragon 3", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/description_63" - }, - "value": { - "annotationValue": "This is the third source. Meh! I have two others...", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#characteristic_category/taxon_id_64" }, - "value": { - "annotationValue": "103695", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - } - ], - "samples": [ - { - "@id": "#sample/16", - "name": "Dragon heart source 1 - A", - "derivesFrom": [{ "@id": "#source/13" }], - "characteristics": [], - "factorValues": [ - { - "category": { "@id": "" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/17", - "name": "Dragon heart source 2 -A", - "derivesFrom": [{ "@id": "#source/14" }], - "characteristics": [], - "factorValues": [ - { - "category": { "@id": "" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/18", - "name": "Dragon heart source 3 - A", - "derivesFrom": [{ "@id": "#source/15" }], - "characteristics": [], - "factorValues": [ - { - "category": { "@id": "" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/19", - "name": "Dragon brain source 1 - A", - "derivesFrom": [{ "@id": "#source/13" }], - "characteristics": [], - "factorValues": [ - { - "category": { "@id": "" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/20", - "name": "Dragon brain source 2 - A", - "derivesFrom": [{ "@id": "#source/14" }], - "characteristics": [], - "factorValues": [ - { - "category": { "@id": "" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/21", - "name": "Dragon brain source 3 - A", - "derivesFrom": [{ "@id": "#source/15" }], - "characteristics": [], - "factorValues": [ - { - "category": { "@id": "" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/22", - "name": "Dragon heart source 1 - C", - "derivesFrom": [{ "@id": "#source/13" }], - "characteristics": [], - "factorValues": [ - { - "category": { "@id": "" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/23", - "name": "Dragon heart source 1 - B", - "derivesFrom": [{ "@id": "#source/13" }], - "characteristics": [], - "factorValues": [ - { - "category": { "@id": "" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/24", - "name": "Dragon heart source 2 - B", - "derivesFrom": [{ "@id": "#source/14" }], - "characteristics": [], - "factorValues": [ - { - "category": { "@id": "" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/25", - "name": "Dragon heart source 2 - C", - "derivesFrom": [{ "@id": "#source/14" }], - "characteristics": [], - "factorValues": [ - { - "category": { "@id": "" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/26", - "name": "Dragon heart source 3 - B", - "derivesFrom": [{ "@id": "#source/15" }], - "characteristics": [], - "factorValues": [ - { - "category": { "@id": "" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/27", - "name": "Dragon heart source 3 - C", - "derivesFrom": [{ "@id": "#source/15" }], - "characteristics": [], - "factorValues": [ - { - "category": { "@id": "" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/28", - "name": "Dragon brain source 1 - B", - "derivesFrom": [{ "@id": "#source/13" }], - "characteristics": [], - "factorValues": [ - { - "category": { "@id": "" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/29", - "name": "Dragon brain source 1 - C", - "derivesFrom": [{ "@id": "#source/13" }], - "characteristics": [], - "factorValues": [ - { - "category": { "@id": "" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/30", - "name": "Dragon brain source 2 - B", - "derivesFrom": [{ "@id": "#source/14" }], - "characteristics": [], - "factorValues": [ - { - "category": { "@id": "" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/31", - "name": "Dragon brain source 2 - C", - "derivesFrom": [{ "@id": "#source/14" }], - "characteristics": [], - "factorValues": [ - { - "category": { "@id": "" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/32", - "name": "Dragon brain source 3 - B", - "derivesFrom": [{ "@id": "#source/15" }], - "characteristics": [], - "factorValues": [ - { - "category": { "@id": "" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/33", - "name": "Dragon brain source 3 - C", - "derivesFrom": [{ "@id": "#source/15" }], - "characteristics": [], - "factorValues": [ - { - "category": { "@id": "" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - } - ] - }, - "protocols": [ - { - "@id": "#protocol/_2", - "name": "sample collection", - "protocolType": { - "annotationValue": "sample collection", - "termAccession": "", - "termSource": "" - }, - "description": "", - "uri": "", - "version": "", - "parameters": [], - "components": [ - { - "componentName": "", - "componentType": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - } - } - ] - }, - { - "@id": "#protocol/_9", - "name": "library construction", - "protocolType": { - "annotationValue": "library construction", - "termAccession": "", - "termSource": "" - }, - "description": "", - "uri": "", - "version": "", - "parameters": [ - { - "@id": "#parameter/70", - "parameterName": { - "annotationValue": "library_construction_protocol", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#parameter/72", - "parameterName": { - "annotationValue": "design_description", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#parameter/73", - "parameterName": { - "annotationValue": "library_source", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#parameter/74", - "parameterName": { - "annotationValue": "library_strategy", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#parameter/75", - "parameterName": { - "annotationValue": "library_selection", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#parameter/76", - "parameterName": { - "annotationValue": "library_layout", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#parameter/77", - "parameterName": { - "annotationValue": "insert_size", - "termAccession": "", - "termSource": "" - } - } - ], - "components": [ - { - "componentName": "", - "componentType": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - } - } - ] - }, - { - "@id": "#protocol/_10", - "name": "nucleic acid sequencing", - "protocolType": { - "annotationValue": "nucleic acid sequencing", - "termAccession": "", - "termSource": "" - }, - "description": "", - "uri": "", - "version": "", - "parameters": [ - { - "@id": "#parameter/84", - "parameterName": { - "annotationValue": "platform", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#parameter/85", - "parameterName": { - "annotationValue": "instrument_model", - "termAccession": "", - "termSource": "" - } - } - ], - "components": [ - { - "componentName": "", - "componentType": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - } - } - ] - } - ], - "processSequence": [ - { - "@id": "#process/sample_collection/16", - "name": "", - "executesProtocol": { "@id": "#protocol/_2" }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [{ "@id": "#source/13" }], - "outputs": [{ "@id": "#sample/16" }] - }, - { - "@id": "#process/sample_collection/17", - "name": "", - "executesProtocol": { "@id": "#protocol/_2" }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [{ "@id": "#source/14" }], - "outputs": [{ "@id": "#sample/17" }] - }, - { - "@id": "#process/sample_collection/18", - "name": "", - "executesProtocol": { "@id": "#protocol/_2" }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [{ "@id": "#source/15" }], - "outputs": [{ "@id": "#sample/18" }] - }, - { - "@id": "#process/sample_collection/19", - "name": "", - "executesProtocol": { "@id": "#protocol/_2" }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [{ "@id": "#source/13" }], - "outputs": [{ "@id": "#sample/19" }] - }, - { - "@id": "#process/sample_collection/20", - "name": "", - "executesProtocol": { "@id": "#protocol/_2" }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [{ "@id": "#source/14" }], - "outputs": [{ "@id": "#sample/20" }] - }, - { - "@id": "#process/sample_collection/21", - "name": "", - "executesProtocol": { "@id": "#protocol/_2" }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [{ "@id": "#source/15" }], - "outputs": [{ "@id": "#sample/21" }] - }, - { - "@id": "#process/sample_collection/22", - "name": "", - "executesProtocol": { "@id": "#protocol/_2" }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [{ "@id": "#source/13" }], - "outputs": [{ "@id": "#sample/22" }] - }, - { - "@id": "#process/sample_collection/23", - "name": "", - "executesProtocol": { "@id": "#protocol/_2" }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [{ "@id": "#source/13" }], - "outputs": [{ "@id": "#sample/23" }] - }, - { - "@id": "#process/sample_collection/24", - "name": "", - "executesProtocol": { "@id": "#protocol/_2" }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [{ "@id": "#source/14" }], - "outputs": [{ "@id": "#sample/24" }] - }, - { - "@id": "#process/sample_collection/25", - "name": "", - "executesProtocol": { "@id": "#protocol/_2" }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [{ "@id": "#source/14" }], - "outputs": [{ "@id": "#sample/25" }] - }, - { - "@id": "#process/sample_collection/26", - "name": "", - "executesProtocol": { "@id": "#protocol/_2" }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [{ "@id": "#source/15" }], - "outputs": [{ "@id": "#sample/26" }] - }, - { - "@id": "#process/sample_collection/27", - "name": "", - "executesProtocol": { "@id": "#protocol/_2" }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [{ "@id": "#source/15" }], - "outputs": [{ "@id": "#sample/27" }] - }, - { - "@id": "#process/sample_collection/28", - "name": "", - "executesProtocol": { "@id": "#protocol/_2" }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [{ "@id": "#source/13" }], - "outputs": [{ "@id": "#sample/28" }] - }, - { - "@id": "#process/sample_collection/29", - "name": "", - "executesProtocol": { "@id": "#protocol/_2" }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [{ "@id": "#source/13" }], - "outputs": [{ "@id": "#sample/29" }] - }, - { - "@id": "#process/sample_collection/30", - "name": "", - "executesProtocol": { "@id": "#protocol/_2" }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [{ "@id": "#source/14" }], - "outputs": [{ "@id": "#sample/30" }] - }, - { - "@id": "#process/sample_collection/31", - "name": "", - "executesProtocol": { "@id": "#protocol/_2" }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [{ "@id": "#source/14" }], - "outputs": [{ "@id": "#sample/31" }] - }, - { - "@id": "#process/sample_collection/32", - "name": "", - "executesProtocol": { "@id": "#protocol/_2" }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [{ "@id": "#source/15" }], - "outputs": [{ "@id": "#sample/32" }] - }, - { - "@id": "#process/sample_collection/33", - "name": "", - "executesProtocol": { "@id": "#protocol/_2" }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [{ "@id": "#source/15" }], - "outputs": [{ "@id": "#sample/33" }] - } - ], - "assays": [ - { - "@id": "#assay/9_10", - "filename": "a_assays.txt", - "measurementType": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "technologyType": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "technologyPlatform": "", - "characteristicCategories": [ - { - "@id": "#characteristic_category/title_71", - "characteristicType": { - "annotationValue": "title", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#characteristic_category/submission_date_79", - "characteristicType": { - "annotationValue": "submission date", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#characteristic_category/status_80", - "characteristicType": { - "annotationValue": "status", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#characteristic_category/accession_81", - "characteristicType": { - "annotationValue": "accession", - "termAccession": "", - "termSource": "" - } - } - ], - "materials": { - "samples": [], - "otherMaterials": [ - { - "@id": "#other_material/34", - "name": "Lib Heart 1 - A", - "type": "library_name", - "characteristics": [ - { - "category": { "@id": "#characteristic_category/title_71" }, - "value": { - "annotationValue": "Lib Title Heart 1 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#characteristic_category/status_80" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [{ "@id": "#sample/16" }] - }, - { - "@id": "#other_material/35", - "name": "Lib Heart 1 - B", - "type": "library_name", - "characteristics": [ - { - "category": { "@id": "#characteristic_category/title_71" }, - "value": { - "annotationValue": "Lib Title Heart 1 - B", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#characteristic_category/status_80" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [{ "@id": "#sample/23" }] - }, - { - "@id": "#other_material/36", - "name": "Lib Heart 1 - C", - "type": "library_name", - "characteristics": [ - { - "category": { "@id": "#characteristic_category/title_71" }, - "value": { - "annotationValue": "Lib Title Heart 1 - C", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#characteristic_category/status_80" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [{ "@id": "#sample/22" }] - }, - { - "@id": "#other_material/37", - "name": "Lib Heart 2 - A", - "type": "library_name", - "characteristics": [ - { - "category": { "@id": "#characteristic_category/title_71" }, - "value": { - "annotationValue": "Lib Title Heart 2 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#characteristic_category/status_80" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [{ "@id": "#sample/17" }] - }, - { - "@id": "#other_material/38", - "name": "Lib Heart 2 - B", - "type": "library_name", - "characteristics": [ - { - "category": { "@id": "#characteristic_category/title_71" }, - "value": { - "annotationValue": "Lib Title Heart 2 - B", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#characteristic_category/status_80" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [{ "@id": "#sample/24" }] - }, - { - "@id": "#other_material/39", - "name": "Lib Heart 2 - C", - "type": "library_name", - "characteristics": [ - { - "category": { "@id": "#characteristic_category/title_71" }, - "value": { - "annotationValue": "Lib Title Heart 2 - C", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#characteristic_category/status_80" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [{ "@id": "#sample/25" }] - }, - { - "@id": "#other_material/40", - "name": "Lib Heart 3 - A", - "type": "library_name", - "characteristics": [ - { - "category": { "@id": "#characteristic_category/title_71" }, - "value": { - "annotationValue": "Lib Title Heart 3 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#characteristic_category/status_80" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [{ "@id": "#sample/18" }] - }, - { - "@id": "#other_material/41", - "name": "Lib Heart 3 - B", - "type": "library_name", - "characteristics": [ - { - "category": { "@id": "#characteristic_category/title_71" }, - "value": { - "annotationValue": "Lib Title Heart 3 - B", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#characteristic_category/status_80" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [{ "@id": "#sample/26" }] - }, - { - "@id": "#other_material/42", - "name": "Lib Heart 3 - C", - "type": "library_name", - "characteristics": [ - { - "category": { "@id": "#characteristic_category/title_71" }, - "value": { - "annotationValue": "Lib Title Heart 3 - C", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#characteristic_category/status_80" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [{ "@id": "#sample/27" }] - }, - { - "@id": "#other_material/43", - "name": "Lib Brain 1 - A", - "type": "library_name", - "characteristics": [ - { - "category": { "@id": "#characteristic_category/title_71" }, - "value": { - "annotationValue": "Lib Title Brain 1 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#characteristic_category/status_80" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [{ "@id": "#sample/19" }] - }, - { - "@id": "#other_material/44", - "name": "Lib Brain 1 - B", - "type": "library_name", - "characteristics": [ - { - "category": { "@id": "#characteristic_category/title_71" }, - "value": { - "annotationValue": "Lib Title Brain 1 - B", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#characteristic_category/status_80" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [{ "@id": "#sample/28" }] - }, - { - "@id": "#other_material/45", - "name": "Lib Brain 1 - C", - "type": "library_name", - "characteristics": [ - { - "category": { "@id": "#characteristic_category/title_71" }, - "value": { - "annotationValue": "Lib Title Brain 1 - C", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#characteristic_category/status_80" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [{ "@id": "#sample/29" }] - }, - { - "@id": "#other_material/46", - "name": "Lib Brain 2 - A", - "type": "library_name", - "characteristics": [ - { - "category": { "@id": "#characteristic_category/title_71" }, - "value": { - "annotationValue": "Lib Title Brain 2 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#characteristic_category/status_80" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [{ "@id": "#sample/20" }] - }, - { - "@id": "#other_material/47", - "name": "Lib Brain 2 - B", - "type": "library_name", - "characteristics": [ - { - "category": { "@id": "#characteristic_category/title_71" }, - "value": { - "annotationValue": "Lib Title Brain 2 - B", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#characteristic_category/status_80" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [{ "@id": "#sample/30" }] - }, - { - "@id": "#other_material/48", - "name": "Lib Brain 2 - C", - "type": "library_name", - "characteristics": [ - { - "category": { "@id": "#characteristic_category/title_71" }, - "value": { - "annotationValue": "Lib Title Brain 2 - C", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#characteristic_category/status_80" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [{ "@id": "#sample/31" }] - }, - { - "@id": "#other_material/49", - "name": "Lib Brain 3 - A", - "type": "library_name", - "characteristics": [ - { - "category": { "@id": "#characteristic_category/title_71" }, - "value": { - "annotationValue": "Lib Title Brain 3 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#characteristic_category/status_80" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [{ "@id": "#sample/21" }] - }, - { - "@id": "#other_material/50", - "name": "Lib Brain 3 - B", - "type": "library_name", - "characteristics": [ - { - "category": { "@id": "#characteristic_category/title_71" }, - "value": { - "annotationValue": "Lib Title Brain 3 - B", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#characteristic_category/status_80" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [{ "@id": "#sample/32" }] - }, - { - "@id": "#other_material/51", - "name": "Lib Brain 3 - C", - "type": "library_name", - "characteristics": [ - { - "category": { "@id": "#characteristic_category/title_71" }, - "value": { - "annotationValue": "Lib Title Brain 3 - C", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#characteristic_category/status_80" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [{ "@id": "#sample/33" }] - } - ] - }, - "processSequence": [ - { - "@id": "#process/library_construction/34", - "name": "", - "executesProtocol": { "@id": "#protocol/_9" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/70" }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/72" }, - "value": { - "annotationValue": "Lib description Heart 1 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/73" }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/74" }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/75" }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/76" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/77" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/sample_collection/34" }, - "nextProcess": { "@id": "#process/nucleic_acid_sequencing/34" }, - "inputs": [{ "@id": "#sample/16" }], - "outputs": [{ "@id": "#sample/34" }] - }, - { - "@id": "#process/library_construction/35", - "name": "", - "executesProtocol": { "@id": "#protocol/_9" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/70" }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/72" }, - "value": { - "annotationValue": "Lib description Heart 1 - B", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/73" }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/74" }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/75" }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/76" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/77" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/sample_collection/35" }, - "nextProcess": { "@id": "#process/nucleic_acid_sequencing/35" }, - "inputs": [{ "@id": "#sample/23" }], - "outputs": [{ "@id": "#sample/35" }] - }, - { - "@id": "#process/library_construction/36", - "name": "", - "executesProtocol": { "@id": "#protocol/_9" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/70" }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/72" }, - "value": { - "annotationValue": "Lib description Heart 1 - C", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/73" }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/74" }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/75" }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/76" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/77" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/sample_collection/36" }, - "nextProcess": { "@id": "#process/nucleic_acid_sequencing/36" }, - "inputs": [{ "@id": "#sample/22" }], - "outputs": [{ "@id": "#sample/36" }] - }, - { - "@id": "#process/library_construction/37", - "name": "", - "executesProtocol": { "@id": "#protocol/_9" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/70" }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/72" }, - "value": { - "annotationValue": "Lib description Heart 2 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/73" }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/74" }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/75" }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/76" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/77" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/sample_collection/37" }, - "nextProcess": { "@id": "#process/nucleic_acid_sequencing/37" }, - "inputs": [{ "@id": "#sample/17" }], - "outputs": [{ "@id": "#sample/37" }] - }, - { - "@id": "#process/library_construction/38", - "name": "", - "executesProtocol": { "@id": "#protocol/_9" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/70" }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/72" }, - "value": { - "annotationValue": "Lib description Heart 2 - B", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/73" }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/74" }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/75" }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/76" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/77" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/sample_collection/38" }, - "nextProcess": { "@id": "#process/nucleic_acid_sequencing/38" }, - "inputs": [{ "@id": "#sample/24" }], - "outputs": [{ "@id": "#sample/38" }] - }, - { - "@id": "#process/library_construction/39", - "name": "", - "executesProtocol": { "@id": "#protocol/_9" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/70" }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/72" }, - "value": { - "annotationValue": "Lib description Heart 2 - C", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/73" }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/74" }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/75" }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/76" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/77" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/sample_collection/39" }, - "nextProcess": { "@id": "#process/nucleic_acid_sequencing/39" }, - "inputs": [{ "@id": "#sample/25" }], - "outputs": [{ "@id": "#sample/39" }] - }, - { - "@id": "#process/library_construction/40", - "name": "", - "executesProtocol": { "@id": "#protocol/_9" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/70" }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/72" }, - "value": { - "annotationValue": "Lib description Heart 3 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/73" }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/74" }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/75" }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/76" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/77" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/sample_collection/40" }, - "nextProcess": { "@id": "#process/nucleic_acid_sequencing/40" }, - "inputs": [{ "@id": "#sample/18" }], - "outputs": [{ "@id": "#sample/40" }] - }, - { - "@id": "#process/library_construction/41", - "name": "", - "executesProtocol": { "@id": "#protocol/_9" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/70" }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/72" }, - "value": { - "annotationValue": "Lib description Heart 3 - B", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/73" }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/74" }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/75" }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/76" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/77" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/sample_collection/41" }, - "nextProcess": { "@id": "#process/nucleic_acid_sequencing/41" }, - "inputs": [{ "@id": "#sample/26" }], - "outputs": [{ "@id": "#sample/41" }] - }, - { - "@id": "#process/library_construction/42", - "name": "", - "executesProtocol": { "@id": "#protocol/_9" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/70" }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/72" }, - "value": { - "annotationValue": "Lib description Heart 3 - C", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/73" }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/74" }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/75" }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/76" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/77" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/sample_collection/42" }, - "nextProcess": { "@id": "#process/nucleic_acid_sequencing/42" }, - "inputs": [{ "@id": "#sample/27" }], - "outputs": [{ "@id": "#sample/42" }] - }, - { - "@id": "#process/library_construction/43", - "name": "", - "executesProtocol": { "@id": "#protocol/_9" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/70" }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/72" }, - "value": { - "annotationValue": "Lib description Brain 1 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/73" }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/74" }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/75" }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/76" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/77" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/sample_collection/43" }, - "nextProcess": { "@id": "#process/nucleic_acid_sequencing/43" }, - "inputs": [{ "@id": "#sample/19" }], - "outputs": [{ "@id": "#sample/43" }] - }, - { - "@id": "#process/library_construction/44", - "name": "", - "executesProtocol": { "@id": "#protocol/_9" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/70" }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/72" }, - "value": { - "annotationValue": "Lib description Brain 1 - B", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/73" }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/74" }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/75" }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/76" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/77" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/sample_collection/44" }, - "nextProcess": { "@id": "#process/nucleic_acid_sequencing/44" }, - "inputs": [{ "@id": "#sample/28" }], - "outputs": [{ "@id": "#sample/44" }] - }, - { - "@id": "#process/library_construction/45", - "name": "", - "executesProtocol": { "@id": "#protocol/_9" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/70" }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/72" }, - "value": { - "annotationValue": "Lib description Brain 1 - C", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/73" }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/74" }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/75" }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/76" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/77" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/sample_collection/45" }, - "nextProcess": { "@id": "#process/nucleic_acid_sequencing/45" }, - "inputs": [{ "@id": "#sample/29" }], - "outputs": [{ "@id": "#sample/45" }] - }, - { - "@id": "#process/library_construction/46", - "name": "", - "executesProtocol": { "@id": "#protocol/_9" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/70" }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/72" }, - "value": { - "annotationValue": "Lib description Brain 2 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/73" }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/74" }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/75" }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/76" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/77" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/sample_collection/46" }, - "nextProcess": { "@id": "#process/nucleic_acid_sequencing/46" }, - "inputs": [{ "@id": "#sample/20" }], - "outputs": [{ "@id": "#sample/46" }] - }, - { - "@id": "#process/library_construction/47", - "name": "", - "executesProtocol": { "@id": "#protocol/_9" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/70" }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/72" }, - "value": { - "annotationValue": "Lib description Brain 2 - B", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/73" }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/74" }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/75" }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/76" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/77" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/sample_collection/47" }, - "nextProcess": { "@id": "#process/nucleic_acid_sequencing/47" }, - "inputs": [{ "@id": "#sample/30" }], - "outputs": [{ "@id": "#sample/47" }] - }, - { - "@id": "#process/library_construction/48", - "name": "", - "executesProtocol": { "@id": "#protocol/_9" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/70" }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/72" }, - "value": { - "annotationValue": "Lib description Brain 2 - C", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/73" }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/74" }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/75" }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/76" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/77" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/sample_collection/48" }, - "nextProcess": { "@id": "#process/nucleic_acid_sequencing/48" }, - "inputs": [{ "@id": "#sample/31" }], - "outputs": [{ "@id": "#sample/48" }] - }, - { - "@id": "#process/library_construction/49", - "name": "", - "executesProtocol": { "@id": "#protocol/_9" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/70" }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/72" }, - "value": { - "annotationValue": "Lib description Brain 3 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/73" }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/74" }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/75" }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/76" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/77" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/sample_collection/49" }, - "nextProcess": { "@id": "#process/nucleic_acid_sequencing/49" }, - "inputs": [{ "@id": "#sample/21" }], - "outputs": [{ "@id": "#sample/49" }] - }, - { - "@id": "#process/library_construction/50", - "name": "", - "executesProtocol": { "@id": "#protocol/_9" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/70" }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/72" }, - "value": { - "annotationValue": "Lib description Brain 3 - B", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/73" }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/74" }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/75" }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/76" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/77" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/sample_collection/50" }, - "nextProcess": { "@id": "#process/nucleic_acid_sequencing/50" }, - "inputs": [{ "@id": "#sample/32" }], - "outputs": [{ "@id": "#sample/50" }] - }, - { - "@id": "#process/library_construction/51", - "name": "", - "executesProtocol": { "@id": "#protocol/_9" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/70" }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/72" }, - "value": { - "annotationValue": "Lib description Brain 3 - C", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/73" }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/74" }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/75" }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/76" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/77" }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/sample_collection/51" }, - "nextProcess": { "@id": "#process/nucleic_acid_sequencing/51" }, - "inputs": [{ "@id": "#sample/33" }], - "outputs": [{ "@id": "#sample/51" }] - }, - { - "@id": "#process/nucleic_acid_sequencing/52", - "name": "", - "executesProtocol": { "@id": "#protocol/_10" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/84" }, - "value": { - "annotationValue": "Illumina", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/85" }, - "value": { - "annotationValue": "Illumina NovaSeq 6000", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/library_construction/52" }, - "nextProcess": {}, - "inputs": [ - { "@id": "#other_material/34" }, - { "@id": "#other_material/35" }, - { "@id": "#other_material/36" } - ], - "outputs": [{ "@id": "#sample/52" }] - }, - { - "@id": "#process/nucleic_acid_sequencing/53", - "name": "", - "executesProtocol": { "@id": "#protocol/_10" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/84" }, - "value": { - "annotationValue": "Illumina", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/85" }, - "value": { - "annotationValue": "Illumina NovaSeq 6000", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/library_construction/53" }, - "nextProcess": {}, - "inputs": [ - { "@id": "#other_material/37" }, - { "@id": "#other_material/38" }, - { "@id": "#other_material/39" } - ], - "outputs": [{ "@id": "#sample/53" }] - }, - { - "@id": "#process/nucleic_acid_sequencing/54", - "name": "", - "executesProtocol": { "@id": "#protocol/_10" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/84" }, - "value": { - "annotationValue": "Illumina", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/85" }, - "value": { - "annotationValue": "Illumina NovaSeq 6000", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/library_construction/54" }, - "nextProcess": {}, - "inputs": [ - { "@id": "#other_material/40" }, - { "@id": "#other_material/41" }, - { "@id": "#other_material/42" } - ], - "outputs": [{ "@id": "#sample/54" }] - }, - { - "@id": "#process/nucleic_acid_sequencing/55", - "name": "", - "executesProtocol": { "@id": "#protocol/_10" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/84" }, - "value": { - "annotationValue": "Illumina", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/85" }, - "value": { - "annotationValue": "Illumina NovaSeq 6000", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/library_construction/55" }, - "nextProcess": {}, - "inputs": [ - { "@id": "#other_material/43" }, - { "@id": "#other_material/44" }, - { "@id": "#other_material/45" } - ], - "outputs": [{ "@id": "#sample/55" }] - }, - { - "@id": "#process/nucleic_acid_sequencing/56", - "name": "", - "executesProtocol": { "@id": "#protocol/_10" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/84" }, - "value": { - "annotationValue": "Illumina", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/85" }, - "value": { - "annotationValue": "Illumina NovaSeq 6000", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/library_construction/56" }, - "nextProcess": {}, - "inputs": [ - { "@id": "#other_material/46" }, - { "@id": "#other_material/47" }, - { "@id": "#other_material/48" } - ], - "outputs": [{ "@id": "#sample/56" }] - }, - { - "@id": "#process/nucleic_acid_sequencing/57", - "name": "", - "executesProtocol": { "@id": "#protocol/_10" }, - "parameterValues": [ - { - "category": { "@id": "#parameter/84" }, - "value": { - "annotationValue": "Illumina", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { "@id": "#parameter/85" }, - "value": { - "annotationValue": "Illumina NovaSeq 6000", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { "@id": "#process/library_construction/57" }, - "nextProcess": {}, - "inputs": [ - { "@id": "#other_material/49" }, - { "@id": "#other_material/50" }, - { "@id": "#other_material/51" } - ], - "outputs": [{ "@id": "#sample/57" }] - } - ], - "dataFiles": [ - { - "@id": "#data_file/52", - "name": "Datafile Dragon Heart 1.fastq", - "type": "Raw Data File", - "comments": [ - { "name": "file_type", "value": "fastq" }, - { "name": "file checksum", "value": "" }, - { "name": "submission date", "value": "" }, - { "name": "status", "value": "" }, - { "name": "accession", "value": "" } - ] - }, - { - "@id": "#data_file/53", - "name": "Datafile Dragon Heart 2.fastq", - "type": "Raw Data File", - "comments": [ - { "name": "file_type", "value": "fastq" }, - { "name": "file checksum", "value": "" }, - { "name": "submission date", "value": "" }, - { "name": "status", "value": "" }, - { "name": "accession", "value": "" } - ] - }, - { - "@id": "#data_file/54", - "name": "Datafile Dragon Heart 3.fastq", - "type": "Raw Data File", - "comments": [ - { "name": "file_type", "value": "fastq" }, - { "name": "file checksum", "value": "" }, - { "name": "submission date", "value": "" }, - { "name": "status", "value": "" }, - { "name": "accession", "value": "" } - ] - }, - { - "@id": "#data_file/55", - "name": "Datafile Dragon Brain 1.fastq", - "type": "Raw Data File", - "comments": [ - { "name": "file_type", "value": "fastq" }, - { "name": "file checksum", "value": "" }, - { "name": "submission date", "value": "" }, - { "name": "status", "value": "" }, - { "name": "accession", "value": "" } - ] - }, - { - "@id": "#data_file/56", - "name": "Datafile Dragon Brain 2.fastq", - "type": "Raw Data File", - "comments": [ - { "name": "file_type", "value": "fastq" }, - { "name": "file checksum", "value": "" }, - { "name": "submission date", "value": "" }, - { "name": "status", "value": "" }, - { "name": "accession", "value": "" } - ] - }, - { - "@id": "#data_file/57", - "name": "Datafile Dragon Brain 3.fastq", - "type": "Raw Data File", - "comments": [ - { "name": "file_type", "value": "fastq" }, - { "name": "file checksum", "value": "" }, - { "name": "submission date", "value": "" }, - { "name": "status", "value": "" }, - { "name": "accession", "value": "" } - ] - } - ], - "unitCategories": [] - } - ], - "factors": [], - "unitCategories": [] - } - ] -} From 8838c9ee97ce0a4e3f8306c54909e43de86c8aa9 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Tue, 26 Sep 2023 14:00:01 +0200 Subject: [PATCH 08/62] Add additional objects --- read-isa-json/ena_objects/characteristic.py | 97 +++++++++++++++++++ read-isa-json/ena_objects/ena_sample.py | 8 +- read-isa-json/ena_objects/ena_std_lib.py | 5 +- read-isa-json/ena_objects/other_material.py | 37 +++++++ .../other_material_characteristic.py | 14 +++ read-isa-json/ena_objects/parameter_value.py | 15 +++ 6 files changed, 170 insertions(+), 6 deletions(-) create mode 100644 read-isa-json/ena_objects/characteristic.py create mode 100644 read-isa-json/ena_objects/other_material.py create mode 100644 read-isa-json/ena_objects/other_material_characteristic.py create mode 100644 read-isa-json/ena_objects/parameter_value.py diff --git a/read-isa-json/ena_objects/characteristic.py b/read-isa-json/ena_objects/characteristic.py new file mode 100644 index 0000000..e6890b4 --- /dev/null +++ b/read-isa-json/ena_objects/characteristic.py @@ -0,0 +1,97 @@ +from typing import List, Dict +from ena_objects.ena_std_lib import validate_dict + + +class IsaBase: + """ + This is the base class + """ + + def check_dict_keys(self, dict: Dict, mandatory_keys): + [validate_dict(dict=dict, key=key) for key in mandatory_keys] + + +class Category(IsaBase): + """ + This represents a category object in a Characteristic + """ + + def __init__(self, id: str) -> None: + self.id = id + + mandatory_keys = ["id"] + + def from_dict(self, dict: Dict): + super().check_dict_keys(dict, self.mandatory_keys) + + return Category(id=dict["id"]) + + +class Value(IsaBase): + """ + This represents a Value object in a Characteristic + """ + + mandatory_keys = ["annotation_value", "term_accession", "term_source"] + + def __init__( + self, annotation_value: str, term_source: str = "", term_accession: str = "" + ) -> None: + self.annotation_value = annotation_value + self.term_source = term_source + self.term_accession = term_accession + + def from_dict(self, dict: Dict): + super().check_dict_keys(dict, self.mandatory_keys) + + return Unit( + annotation_value=dict["annotation_value"], + term_accession=dict["term_accession"], + term_source=dict["term_source"], + ) + + +class Unit(IsaBase): + """ + This represents the Unit object in a Characteristic + """ + + mandatory_keys = ["tern_source", "term_accession", "comments"] + + def __init__( + self, term_source: str, term_accession: str, comments: List[any] + ) -> None: + self.term_source = term_source + self.term_accession = term_accession + self.comments = comments + + def from_dict(self, dict: Dict): + super().check_dict_keys(dict, self.mandatory_keys) + + return Unit( + term_source=dict["term_source"], + term_accession=dict["term_accession"], + comments=dict["comments"], + ) + + +class Characteristic(IsaBase): + """ + This is the base class of a characteristics object. + """ + + mandatory_keys = ["category", "value", "unit"] + + def __init__(self, category: Category, value: Value, unit: Unit) -> None: + self.category = category + self.value = value + self.unit = unit + + def from_dict(self, dict: Dict): + super().check_dict_keys(dict, self.mandatory_keys) + + return Unit( + category=Category.from_dict(dict["category"]), + value=Value.from_dict(dict["value"]), + unit=Unit.from_dict(dict["unit"]), + ) diff --git a/read-isa-json/ena_objects/ena_sample.py b/read-isa-json/ena_objects/ena_sample.py index 09237db..2e40ef8 100644 --- a/read-isa-json/ena_objects/ena_sample.py +++ b/read-isa-json/ena_objects/ena_sample.py @@ -8,10 +8,10 @@ class EnaSample: def __init__( self, - alias: str, - status: str, - taxon_id: int, - sample_description: str, + alias: str = "Sample alias", + status: str = "sample status", + taxon_id: int = "sample_taon_id", + sample_description: str = "sample_description", ) -> None: self.alias = alias self.status = status diff --git a/read-isa-json/ena_objects/ena_std_lib.py b/read-isa-json/ena_objects/ena_std_lib.py index aa2ce0d..970ade7 100644 --- a/read-isa-json/ena_objects/ena_std_lib.py +++ b/read-isa-json/ena_objects/ena_std_lib.py @@ -1,3 +1,4 @@ +from ast import List, ClassDef from typing import Dict @@ -25,7 +26,7 @@ def filter_attribute_by(attribute_list: str, key: str, value: str) -> Dict: return [attribute for attribute in attribute_list if attribute[key] == value] -def validate_isa_json(isa_json: Dict, key: str) -> None: +def validate_dict(dict: Dict, key: str) -> None: """Raises an error if the structure of the ISA JSON Dict is not conform Args: @@ -35,5 +36,5 @@ def validate_isa_json(isa_json: Dict, key: str) -> None: Raises: KeyError: Will display the missing key in the Dict """ - if not key in isa_json.keys(): + if key not in dict.keys(): raise KeyError(f"{key} was not found in the provided ISA JSON.") diff --git a/read-isa-json/ena_objects/other_material.py b/read-isa-json/ena_objects/other_material.py new file mode 100644 index 0000000..223a43c --- /dev/null +++ b/read-isa-json/ena_objects/other_material.py @@ -0,0 +1,37 @@ +from ena_objects.ena_std_lib import validate_dict +from ena_objects.parameter_value import ParameterValue +from ena_objects.other_material_characteristic import OtherMaterialCharacteristic +from ena_objects.characteristic import IsaBase + +from typing import List, Dict + + +class OtherMaterial(IsaBase): + """ + docstring + """ + + mandatory_keys = ["id", "name", "type", "other_material_characteristics"] + + def __init__( + self, + id: int, + name: str, + type: str, + other_material_characteristics: OtherMaterialCharacteristic, + ) -> None: + self.id = id + self.name = name + self.type = type + self.other_material_characteristics = other_material_characteristics + + def from_dict(self, dict): + super().check_dict_keys(dict, self.mandatory_keys) + return OtherMaterial( + id=dict["id"], + name=dict["name"], + type=dict["type"], + other_material_characteristics=OtherMaterialCharacteristic.from_dict( + dict["other_material_characteristics"] + ), + ) diff --git a/read-isa-json/ena_objects/other_material_characteristic.py b/read-isa-json/ena_objects/other_material_characteristic.py new file mode 100644 index 0000000..55c411b --- /dev/null +++ b/read-isa-json/ena_objects/other_material_characteristic.py @@ -0,0 +1,14 @@ +from typing import Dict +from ena_objects.characteristic import Category, Characteristic, Unit, Value + + +class OtherMaterialCharacteristic(Characteristic): + """ + This class represents the other material object. + """ + + def __init__(self, category: Category, value: Value, unit: Unit) -> None: + super().__init__(category, value, unit) + + def from_dict(self, dict: Dict): + return super().from_dict(dict) diff --git a/read-isa-json/ena_objects/parameter_value.py b/read-isa-json/ena_objects/parameter_value.py new file mode 100644 index 0000000..3f0f99c --- /dev/null +++ b/read-isa-json/ena_objects/parameter_value.py @@ -0,0 +1,15 @@ +from typing import Dict +from ena_objects.characteristic import Category, Characteristic, Unit, Value + + +class ParameterValue(Characteristic): + """ + This class represents a paramenter value in the isa study + and is inherited from the Characteristic class + """ + + def __init__(self, category: Category, value: Value, unit: Unit) -> None: + super().__init__(category, value, unit) + + def from_dict(self, dict: Dict): + return super().from_dict(dict) From 013705219c836c17300a760a09066ea70aa6523a Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Tue, 26 Sep 2023 14:01:03 +0200 Subject: [PATCH 09/62] Updated validation --- read-isa-json/ena_objects/ena_study.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/read-isa-json/ena_objects/ena_study.py b/read-isa-json/ena_objects/ena_study.py index 7777dfe..d8168ba 100644 --- a/read-isa-json/ena_objects/ena_study.py +++ b/read-isa-json/ena_objects/ena_study.py @@ -1,6 +1,6 @@ from typing import List, Optional, Dict from pandas import DataFrame -from ena_objects.ena_std_lib import filter_attribute_by, validate_isa_json +from ena_objects.ena_std_lib import filter_attribute_by, validate_dict def study_publication_ids(publication_isa_json: Dict) -> List[int]: @@ -34,6 +34,8 @@ def study_alias(study_isa_json: str) -> str: class EnaStudy: """Generates a Study object, compliant to the requirements of ENA""" + mandatory_keys = ["title", "description", "publications"] + def __init__( self, alias: str, @@ -59,8 +61,7 @@ def from_isa_json(isa_json: Dict): Returns: EnaStudy: EnaStudy object """ - mandatory_keys = ["title", "description", "publications"] - [validate_isa_json(isa_json, key) for key in mandatory_keys] + [validate_dict(isa_json, key) for key in EnaStudy.mandatory_keys] return [ EnaStudy( From 6493e4e5f7084e85994cf1ae353766ea931438d1 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Tue, 26 Sep 2023 20:14:56 +0200 Subject: [PATCH 10/62] Implementation of the Ena Samples functionality --- read-isa-json/ena_objects/ena_sample.py | 111 +++++++++++++++++++++--- read-isa-json/read_isa_json.py | 14 ++- 2 files changed, 114 insertions(+), 11 deletions(-) diff --git a/read-isa-json/ena_objects/ena_sample.py b/read-isa-json/ena_objects/ena_sample.py index 2e40ef8..4308e6e 100644 --- a/read-isa-json/ena_objects/ena_sample.py +++ b/read-isa-json/ena_objects/ena_sample.py @@ -1,19 +1,110 @@ +import re from typing import List, Dict +from pandas import DataFrame + + +def study_characteristic_category_name(study_dict: Dict, id: str) -> Dict: + char_cat_dicts = [ + {"id": cc["@id"], "name": cc["characteristicType"]["annotationValue"]} + for cc in study_dict["characteristicCategories"] + ] + + for ccd in char_cat_dicts: + if ccd["id"] == id: + return ccd["name"] + + +def fetch_characteristics(sample_dict: Dict, study_dict: Dict) -> List[Dict]: + return [ + { + "category_id": char["category"]["@id"], + "category_name": study_characteristic_category_name( + study_dict, char["category"]["@id"] + ), + "value": char["value"]["annotationValue"], + } + for char in sample_dict["characteristics"] + ] + + +def associated_source(sample_dict: Dict, study_dict: Dict) -> List[str]: + sample_id = sample_dict["@id"] + for process in study_dict["processSequence"]: + input_ids = [input["@id"] for input in process["inputs"]] + output_ids = [output["@id"] for output in process["outputs"]] + if sample_id in output_ids: + return input_ids + + +def associated_source_characteristics(sources_data: Dict, ids: List[str]) -> Dict: + for sd in sources_data: + if sd["id"] in ids: + return sd["characteristics"] + + +def sample_alias(id: str): + prefix = "https://datahub.elixir-belgium.org/samples/" # TODO: Replace by something less hard-coded + + sample_id = re.split("/", id)[1] + return prefix + sample_id + class EnaSample: """ Generates an Sample object, compliant to the requirements of ENA """ - def __init__( - self, - alias: str = "Sample alias", - status: str = "sample status", - taxon_id: int = "sample_taon_id", - sample_description: str = "sample_description", - ) -> None: + def __init__(self, characteristics: Dict, alias: str) -> None: self.alias = alias - self.status = status - self.taxon_id = taxon_id - self.sample_description = sample_description + self.characteristics = characteristics + + def __dict__(self): + return { + "alias": self.alias, + "characteristics": self.characteristics, + } + + def from_study_dict(study_dict: Dict) -> None: + sources_data = [ + { + "id": source["@id"], + "name": source["name"], + "characteristics": fetch_characteristics(source, study_dict), + } + for source in study_dict["materials"]["sources"] + ] + + samples_data = [ + { + "id": sample["@id"], + "name": sample["name"], + "characteristics": fetch_characteristics(sample, study_dict), + "source": associated_source(sample, study_dict), + } + for sample in study_dict["materials"]["samples"] + ] + + for sd in samples_data: + for sc in associated_source_characteristics(sources_data, sd["source"]): + sd["characteristics"].append(sc) + + return [ + EnaSample( + alias=sample_alias(sd["id"]), + characteristics=sd["characteristics"], + ) + for sd in samples_data + ] + + +def export_samples_to_dataframe(samples: List[EnaSample]): + flat_dicts = [] + for sample in samples: + sample_dict = sample.__dict__() + characteristics = sample_dict.pop("characteristics") + for char in characteristics: + sample_dict.update({char["category_name"]: char["value"]}) + flat_dicts.append(sample_dict) + + return DataFrame.from_dict(flat_dicts) diff --git a/read-isa-json/read_isa_json.py b/read-isa-json/read_isa_json.py index 8c385ab..0b5e491 100644 --- a/read-isa-json/read_isa_json.py +++ b/read-isa-json/read_isa_json.py @@ -1,10 +1,22 @@ import json + +from ena_objects.ena_sample import EnaSample, export_samples_to_dataframe + from ena_objects.ena_study import EnaStudy # Read json file -isa_json_file = open("read-isa-json/test_isa_json_files/test_local_instance.json") +isa_json_file = open( + "read-isa-json/test_isa_json_files/isa_json_test_investigation.json" +) isa_json = json.load(isa_json_file) studies = EnaStudy.from_isa_json(isa_json) study_dfs = [study.to_dataframe() for study in studies] print(study_dfs[0]) + +study_dict = isa_json["studies"][0] + +samples = EnaSample.from_study_dict(study_dict) +samples_df = export_samples_to_dataframe(samples) +print(samples_df) +print("Done!") From 88de5338af7b424dbe4012e65aab8f3a4c23e3a7 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Tue, 26 Sep 2023 20:51:40 +0200 Subject: [PATCH 11/62] remove init file --- read-isa-json/read_isa_json/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 read-isa-json/read_isa_json/__init__.py diff --git a/read-isa-json/read_isa_json/__init__.py b/read-isa-json/read_isa_json/__init__.py deleted file mode 100644 index e69de29..0000000 From b1f4c944882dde9a4411c7336fa0a692dbeee6a5 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Tue, 26 Sep 2023 21:20:36 +0200 Subject: [PATCH 12/62] Add documentation --- read-isa-json/ena_objects/ena_sample.py | 63 ++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/read-isa-json/ena_objects/ena_sample.py b/read-isa-json/ena_objects/ena_sample.py index 4308e6e..117d42c 100644 --- a/read-isa-json/ena_objects/ena_sample.py +++ b/read-isa-json/ena_objects/ena_sample.py @@ -5,6 +5,15 @@ def study_characteristic_category_name(study_dict: Dict, id: str) -> Dict: + """Retrieves the name of a characteristic id + + Args: + study_dict (Dict): study dictionary + id (str): characteristic ID + + Returns: + Dict: characteristic name corresponding with the ID + """ char_cat_dicts = [ {"id": cc["@id"], "name": cc["characteristicType"]["annotationValue"]} for cc in study_dict["characteristicCategories"] @@ -16,6 +25,15 @@ def study_characteristic_category_name(study_dict: Dict, id: str) -> Dict: def fetch_characteristics(sample_dict: Dict, study_dict: Dict) -> List[Dict]: + """Fetches the characteristics from the given sample dictionary + + Args: + sample_dict (Dict): sample dictionary + study_dict (Dict): study dictionary + + Returns: + List[Dict]: List of characteristic dictionaries + """ return [ { "category_id": char["category"]["@id"], @@ -29,6 +47,15 @@ def fetch_characteristics(sample_dict: Dict, study_dict: Dict) -> List[Dict]: def associated_source(sample_dict: Dict, study_dict: Dict) -> List[str]: + """Retrieves the ID of the source associated with the given sample + + Args: + sample_dict (Dict): sample dictionary + study_dict (Dict): study dictionary + + Returns: + List[str]: List of source ID's + """ sample_id = sample_dict["@id"] for process in study_dict["processSequence"]: input_ids = [input["@id"] for input in process["inputs"]] @@ -38,12 +65,30 @@ def associated_source(sample_dict: Dict, study_dict: Dict) -> List[str]: def associated_source_characteristics(sources_data: Dict, ids: List[str]) -> Dict: + """Retrieves the characteristics of the associated sources, + corresponding with the provided sample ID's + + Args: + sources_data (Dict): dictionary of the sources + ids (List[str]): list of sample ID's + + Returns: + Dict: the dictionary of the source characteristics + """ for sd in sources_data: if sd["id"] in ids: return sd["characteristics"] -def sample_alias(id: str): +def sample_alias(id: str) -> str: + """Retrieves the sample's alias + + Args: + id (str): Sample ID + + Returns: + str: Unique string representation of the alias + """ prefix = "https://datahub.elixir-belgium.org/samples/" # TODO: Replace by something less hard-coded sample_id = re.split("/", id)[1] @@ -66,6 +111,14 @@ def __dict__(self): } def from_study_dict(study_dict: Dict) -> None: + """Generate sample objects from a study dictionary + + Args: + study_dict (Dict): study dictionary + + Returns: + List[EnaSample]: List of Ena Sample objects + """ sources_data = [ { "id": source["@id"], @@ -99,6 +152,14 @@ def from_study_dict(study_dict: Dict) -> None: def export_samples_to_dataframe(samples: List[EnaSample]): + """Exports a list of Ena Samples to a pandas DataFrame + + Args: + samples (List[EnaSample]): Ena sample list + + Returns: + DataFrame: pandas DataFrame + """ flat_dicts = [] for sample in samples: sample_dict = sample.__dict__() From dc2d2eddeb5693a07fa904ec29a537b6089e959d Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Wed, 27 Sep 2023 09:04:20 +0200 Subject: [PATCH 13/62] - Implement Submission wrapper object - Add runs and experiments to Study object --- read-isa-json/ena_objects/ena_study.py | 26 ++++++++++++++++++++- read-isa-json/ena_objects/ena_submission.py | 18 +++++++++++--- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/read-isa-json/ena_objects/ena_study.py b/read-isa-json/ena_objects/ena_study.py index d8168ba..c0e2dc7 100644 --- a/read-isa-json/ena_objects/ena_study.py +++ b/read-isa-json/ena_objects/ena_study.py @@ -1,5 +1,8 @@ from typing import List, Optional, Dict from pandas import DataFrame +from ena_objects.ena_run import EnaRun +from ena_objects.ena_experiment import EnaExperiment +from ena_objects.ena_sample import EnaSample from ena_objects.ena_std_lib import filter_attribute_by, validate_dict @@ -42,6 +45,9 @@ def __init__( title: str, study_type: str, study_abstract: str, + runs: List[EnaRun], + samples: List[EnaSample], + experiments: List[EnaExperiment], new_study_type: Optional[str] = None, pubmed_id: Optional[List[int]] = None, ) -> None: @@ -50,8 +56,22 @@ def __init__( self.study_type = study_type self.new_study_type = new_study_type self.study_abstract = study_abstract + self.runs = runs + self.samples = samples + self.experiments = experiments + self.new_study_type = new_study_type self.pubmed_id = pubmed_id + def __dict__(self): + return { + "alias": self.alias, + "title": self.title, + "study_type": self.study_type, + "study_abstract": self.study_abstract, + "new_study_type": self.new_study_type, + "pubmed_id": self.pubmed_id, + } + def from_isa_json(isa_json: Dict): """Method that creates an EnaStudy with params from ISA JSON Dictionary @@ -69,6 +89,10 @@ def from_isa_json(isa_json: Dict): title=study["title"], study_type="", # TODO: Replace by Custom metadata of the Assay level study_abstract=study["description"], + runs=None, + samples=EnaSample.from_study_dict(study), + experiments=None, + new_study_type=None, pubmed_id=study_publication_ids( publication_isa_json=study["publications"] ), @@ -82,4 +106,4 @@ def to_dataframe(self) -> DataFrame: Returns: DataFrame: Pandas DataFrame representation of the Study """ - return DataFrame.from_dict(vars(self)) + return DataFrame.from_dict(self.__dict__()) diff --git a/read-isa-json/ena_objects/ena_submission.py b/read-isa-json/ena_objects/ena_submission.py index d7eb76f..c8abf60 100644 --- a/read-isa-json/ena_objects/ena_submission.py +++ b/read-isa-json/ena_objects/ena_submission.py @@ -1,5 +1,6 @@ from typing import List, Dict -from ena_objects.ena_run import EnaRun + +from ena_objects.ena_study import EnaStudy class EnaSubmission: @@ -7,5 +8,16 @@ class EnaSubmission: Generates a Submission object, compliant to the requirements of ENA """ - def __init__(self, runs: List[EnaRun]) -> None: - self.runs = runs + def __init__( + self, + studies: List[EnaStudy] = [], + ) -> None: + self.studies = studies + + def from_isa_json(isa_json: Dict) -> None: + return EnaSubmission( + studies=EnaStudy.from_isa_json(isa_json), + ) + + def generate_dataframes(): + pass From 2ff3ca1738a63f41a0f2bbad5c49ce395acb504b Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Wed, 27 Sep 2023 09:04:55 +0200 Subject: [PATCH 14/62] Add Test Class for Ena Study --- tests/test_ena_study.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 tests/test_ena_study.py diff --git a/tests/test_ena_study.py b/tests/test_ena_study.py new file mode 100644 index 0000000..e048c17 --- /dev/null +++ b/tests/test_ena_study.py @@ -0,0 +1,18 @@ +import pytest +from read_isa_json.ena_objects.ena_study import EnaStudy + + +class TestEnaStudy: + """Test class for Ena Study objects""" + + def test_should_raise_key_error(self): + bad_dict = { + "title": "My Title", + "study_description": "Should 'description'", + "publications": None, + } + + with pytest.raises( + KeyError, match="description was not found in the provided ISA JSON." + ): + EnaStudy.from_isa_json(bad_dict) From 1a5cbff7751600cd79dce9fe469cf1a1022457d8 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Wed, 27 Sep 2023 09:05:45 +0200 Subject: [PATCH 15/62] Renamed read-isa-json folder --- {read-isa-json => read_isa_json}/ena_objects/characteristic.py | 0 {read-isa-json => read_isa_json}/ena_objects/ena_experiment.py | 0 {read-isa-json => read_isa_json}/ena_objects/ena_run.py | 0 {read-isa-json => read_isa_json}/ena_objects/ena_sample.py | 0 {read-isa-json => read_isa_json}/ena_objects/ena_std_lib.py | 0 {read-isa-json => read_isa_json}/ena_objects/ena_study.py | 0 {read-isa-json => read_isa_json}/ena_objects/ena_submission.py | 0 {read-isa-json => read_isa_json}/ena_objects/other_material.py | 0 .../ena_objects/other_material_characteristic.py | 0 {read-isa-json => read_isa_json}/ena_objects/parameter_value.py | 0 {read-isa-json => read_isa_json}/read_isa_json.py | 0 .../test_isa_json_files/isa_json_test_investigation.json | 0 12 files changed, 0 insertions(+), 0 deletions(-) rename {read-isa-json => read_isa_json}/ena_objects/characteristic.py (100%) rename {read-isa-json => read_isa_json}/ena_objects/ena_experiment.py (100%) rename {read-isa-json => read_isa_json}/ena_objects/ena_run.py (100%) rename {read-isa-json => read_isa_json}/ena_objects/ena_sample.py (100%) rename {read-isa-json => read_isa_json}/ena_objects/ena_std_lib.py (100%) rename {read-isa-json => read_isa_json}/ena_objects/ena_study.py (100%) rename {read-isa-json => read_isa_json}/ena_objects/ena_submission.py (100%) rename {read-isa-json => read_isa_json}/ena_objects/other_material.py (100%) rename {read-isa-json => read_isa_json}/ena_objects/other_material_characteristic.py (100%) rename {read-isa-json => read_isa_json}/ena_objects/parameter_value.py (100%) rename {read-isa-json => read_isa_json}/read_isa_json.py (100%) rename {read-isa-json => read_isa_json}/test_isa_json_files/isa_json_test_investigation.json (100%) diff --git a/read-isa-json/ena_objects/characteristic.py b/read_isa_json/ena_objects/characteristic.py similarity index 100% rename from read-isa-json/ena_objects/characteristic.py rename to read_isa_json/ena_objects/characteristic.py diff --git a/read-isa-json/ena_objects/ena_experiment.py b/read_isa_json/ena_objects/ena_experiment.py similarity index 100% rename from read-isa-json/ena_objects/ena_experiment.py rename to read_isa_json/ena_objects/ena_experiment.py diff --git a/read-isa-json/ena_objects/ena_run.py b/read_isa_json/ena_objects/ena_run.py similarity index 100% rename from read-isa-json/ena_objects/ena_run.py rename to read_isa_json/ena_objects/ena_run.py diff --git a/read-isa-json/ena_objects/ena_sample.py b/read_isa_json/ena_objects/ena_sample.py similarity index 100% rename from read-isa-json/ena_objects/ena_sample.py rename to read_isa_json/ena_objects/ena_sample.py diff --git a/read-isa-json/ena_objects/ena_std_lib.py b/read_isa_json/ena_objects/ena_std_lib.py similarity index 100% rename from read-isa-json/ena_objects/ena_std_lib.py rename to read_isa_json/ena_objects/ena_std_lib.py diff --git a/read-isa-json/ena_objects/ena_study.py b/read_isa_json/ena_objects/ena_study.py similarity index 100% rename from read-isa-json/ena_objects/ena_study.py rename to read_isa_json/ena_objects/ena_study.py diff --git a/read-isa-json/ena_objects/ena_submission.py b/read_isa_json/ena_objects/ena_submission.py similarity index 100% rename from read-isa-json/ena_objects/ena_submission.py rename to read_isa_json/ena_objects/ena_submission.py diff --git a/read-isa-json/ena_objects/other_material.py b/read_isa_json/ena_objects/other_material.py similarity index 100% rename from read-isa-json/ena_objects/other_material.py rename to read_isa_json/ena_objects/other_material.py diff --git a/read-isa-json/ena_objects/other_material_characteristic.py b/read_isa_json/ena_objects/other_material_characteristic.py similarity index 100% rename from read-isa-json/ena_objects/other_material_characteristic.py rename to read_isa_json/ena_objects/other_material_characteristic.py diff --git a/read-isa-json/ena_objects/parameter_value.py b/read_isa_json/ena_objects/parameter_value.py similarity index 100% rename from read-isa-json/ena_objects/parameter_value.py rename to read_isa_json/ena_objects/parameter_value.py diff --git a/read-isa-json/read_isa_json.py b/read_isa_json/read_isa_json.py similarity index 100% rename from read-isa-json/read_isa_json.py rename to read_isa_json/read_isa_json.py diff --git a/read-isa-json/test_isa_json_files/isa_json_test_investigation.json b/read_isa_json/test_isa_json_files/isa_json_test_investigation.json similarity index 100% rename from read-isa-json/test_isa_json_files/isa_json_test_investigation.json rename to read_isa_json/test_isa_json_files/isa_json_test_investigation.json From 60fd7e1ea74e823c0cdbfd660ead2ff07feb1b65 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Wed, 27 Sep 2023 09:06:49 +0200 Subject: [PATCH 16/62] Add init file to the tests folder --- tests/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/__init__.py diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 From 15702ecbd7b5cee61100ec05a354247fe8436803 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Wed, 27 Sep 2023 11:02:20 +0200 Subject: [PATCH 17/62] Fix failing tests --- ena_objects/__init__.py | 0 .../characteristic.py | 0 ena_objects/ena_experiment.py | 102 ++++++++++++++++++ .../ena_objects => ena_objects}/ena_run.py | 5 +- .../ena_objects => ena_objects}/ena_sample.py | 0 .../ena_std_lib.py | 0 .../ena_objects => ena_objects}/ena_study.py | 12 --- .../ena_submission.py | 0 .../other_material.py | 0 .../other_material_characteristic.py | 0 .../parameter_value.py | 0 read_isa_json/ena_objects/ena_experiment.py | 39 ------- tests/test_ena_study.py | 2 +- 13 files changed, 105 insertions(+), 55 deletions(-) create mode 100644 ena_objects/__init__.py rename {read_isa_json/ena_objects => ena_objects}/characteristic.py (100%) create mode 100644 ena_objects/ena_experiment.py rename {read_isa_json/ena_objects => ena_objects}/ena_run.py (71%) rename {read_isa_json/ena_objects => ena_objects}/ena_sample.py (100%) rename {read_isa_json/ena_objects => ena_objects}/ena_std_lib.py (100%) rename {read_isa_json/ena_objects => ena_objects}/ena_study.py (86%) rename {read_isa_json/ena_objects => ena_objects}/ena_submission.py (100%) rename {read_isa_json/ena_objects => ena_objects}/other_material.py (100%) rename {read_isa_json/ena_objects => ena_objects}/other_material_characteristic.py (100%) rename {read_isa_json/ena_objects => ena_objects}/parameter_value.py (100%) delete mode 100644 read_isa_json/ena_objects/ena_experiment.py diff --git a/ena_objects/__init__.py b/ena_objects/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/read_isa_json/ena_objects/characteristic.py b/ena_objects/characteristic.py similarity index 100% rename from read_isa_json/ena_objects/characteristic.py rename to ena_objects/characteristic.py diff --git a/ena_objects/ena_experiment.py b/ena_objects/ena_experiment.py new file mode 100644 index 0000000..7547c8e --- /dev/null +++ b/ena_objects/ena_experiment.py @@ -0,0 +1,102 @@ +from typing import List, Dict, Union, Optional + +from ena_objects.ena_std_lib import filter_attribute_by +from ena_objects.characteristic import IsaBase +from ena_objects.ena_sample import EnaSample +from ena_objects.parameter_value import ParameterValue +from ena_objects.other_material import OtherMaterial + + +# def filter_other_material_attribute_by( +# data: List[Dict], +# filter_key: str, +# filter_val: str, +# return_key: str, +# ) -> Union[str, int]: +# return [ +# attribute[return_key] +# for attribute in data +# if attribute[filter_key] == filter_val +# ] + + +def experiment_alias(assay_dict: Dict): + prefix = "https://datahub.elixir-belgium.org/assays/" # TODO: Replace by something less hard-coded + seek_assays_id: str = assay_dict["@id"] + return prefix + seek_assays_id + + +def fetch_characteristic_categories(study_dict: Dict): + return [ + {"id": cc["@id"], "value": cc["characteristicType"]["annotationValue"]} + for cc in study_dict["characteristicCategories"] + ] + + +def get_other_materials(study_dict: Dict) -> List[OtherMaterial]: + other_materials = [] + + for study in study_dict["studies"]: + for assay in study["assays"]: + for om in assay["materials"]["otherMaterials"]: + other_materials.append(om) + + return [ + OtherMaterial.from_dict(other_material) for other_material in other_materials + ] + + +# def parameter_id(study_isa_jon: Dict, parameter_name: str) -> Optional[int]: +# for protocol in study_isa_jon["protocols"]: +# for parameter in protocol["parameters"]: +# if parameter["parameterName"]["annotationValue"] == parameter_name: +# return parameter["@id"] + + +# def parameter_value(study_isa_json: Dict, parameter_name: str) -> List[any]: +# pass + + +class EnaExperiment(IsaBase): + """ + Generates an Experiment object, compliant to the requirements of ENA + """ + + mandatory_keys = [ + "alias", + "title", + "study", + "sample", + "parameter_values", + "other_material", + ] + + def __init__( + self, + alias: str, + title: str, + study_alias: str, + sample_alias: str, + library_name: str, + ibrary_strategy, + library_source: str, + library_selection: str, + library_layout: str, + insert_size: str, + library_construction_protocol: str, + platform: str, + instrument_model: str, + ) -> None: + self.alias = alias + self.title = title + self.study = study_alias + self.sample = sample_alias + self.library_name = library_name + self.ibrary_strategy = ibrary_strategy + self.library_source = library_source + self.library_selection = library_selection + self.library_layout = library_layout + self.insert_size = insert_size + self.library_construction_protocol = library_construction_protocol + self.platform = platform + self.instrument_model = instrument_model diff --git a/read_isa_json/ena_objects/ena_run.py b/ena_objects/ena_run.py similarity index 71% rename from read_isa_json/ena_objects/ena_run.py rename to ena_objects/ena_run.py index 148c5da..5385d68 100644 --- a/read_isa_json/ena_objects/ena_run.py +++ b/ena_objects/ena_run.py @@ -1,5 +1,4 @@ from typing import List, Dict -from ena_objects.ena_experiment import EnaExperiment class EnaRun: @@ -10,11 +9,11 @@ class EnaRun: def __init__( self, alias: str, - experiments: List[EnaExperiment], + experiment_alias: str, filename: str, file_type: str, ) -> None: self.alias = alias - self.experiments = experiments + self.experiments = experiment_alias self.filename = filename self.file_type = file_type diff --git a/read_isa_json/ena_objects/ena_sample.py b/ena_objects/ena_sample.py similarity index 100% rename from read_isa_json/ena_objects/ena_sample.py rename to ena_objects/ena_sample.py diff --git a/read_isa_json/ena_objects/ena_std_lib.py b/ena_objects/ena_std_lib.py similarity index 100% rename from read_isa_json/ena_objects/ena_std_lib.py rename to ena_objects/ena_std_lib.py diff --git a/read_isa_json/ena_objects/ena_study.py b/ena_objects/ena_study.py similarity index 86% rename from read_isa_json/ena_objects/ena_study.py rename to ena_objects/ena_study.py index c0e2dc7..531121d 100644 --- a/read_isa_json/ena_objects/ena_study.py +++ b/ena_objects/ena_study.py @@ -1,8 +1,5 @@ from typing import List, Optional, Dict from pandas import DataFrame -from ena_objects.ena_run import EnaRun -from ena_objects.ena_experiment import EnaExperiment -from ena_objects.ena_sample import EnaSample from ena_objects.ena_std_lib import filter_attribute_by, validate_dict @@ -45,9 +42,6 @@ def __init__( title: str, study_type: str, study_abstract: str, - runs: List[EnaRun], - samples: List[EnaSample], - experiments: List[EnaExperiment], new_study_type: Optional[str] = None, pubmed_id: Optional[List[int]] = None, ) -> None: @@ -56,9 +50,6 @@ def __init__( self.study_type = study_type self.new_study_type = new_study_type self.study_abstract = study_abstract - self.runs = runs - self.samples = samples - self.experiments = experiments self.new_study_type = new_study_type self.pubmed_id = pubmed_id @@ -89,9 +80,6 @@ def from_isa_json(isa_json: Dict): title=study["title"], study_type="", # TODO: Replace by Custom metadata of the Assay level study_abstract=study["description"], - runs=None, - samples=EnaSample.from_study_dict(study), - experiments=None, new_study_type=None, pubmed_id=study_publication_ids( publication_isa_json=study["publications"] diff --git a/read_isa_json/ena_objects/ena_submission.py b/ena_objects/ena_submission.py similarity index 100% rename from read_isa_json/ena_objects/ena_submission.py rename to ena_objects/ena_submission.py diff --git a/read_isa_json/ena_objects/other_material.py b/ena_objects/other_material.py similarity index 100% rename from read_isa_json/ena_objects/other_material.py rename to ena_objects/other_material.py diff --git a/read_isa_json/ena_objects/other_material_characteristic.py b/ena_objects/other_material_characteristic.py similarity index 100% rename from read_isa_json/ena_objects/other_material_characteristic.py rename to ena_objects/other_material_characteristic.py diff --git a/read_isa_json/ena_objects/parameter_value.py b/ena_objects/parameter_value.py similarity index 100% rename from read_isa_json/ena_objects/parameter_value.py rename to ena_objects/parameter_value.py diff --git a/read_isa_json/ena_objects/ena_experiment.py b/read_isa_json/ena_objects/ena_experiment.py deleted file mode 100644 index 5572f40..0000000 --- a/read_isa_json/ena_objects/ena_experiment.py +++ /dev/null @@ -1,39 +0,0 @@ -from typing import List, Dict -from ena_objects.ena_study import EnaStudy -from ena_objects.ena_sample import EnaSample - - -class EnaExperiment: - """ - Generates an Experiment object, compliant to the requirements of ENA - """ - - def __init__( - self, - alias: str, - title: str, - study: EnaStudy, - sample: EnaSample, - library_name: str, - ibrary_strategy, - library_source: str, - library_selection: str, - library_layout: str, - insert_size: str, - library_construction_protocol: str, - platform: str, - instrument_model: str, - ) -> None: - self.alias = alias - self.title = title - self.study = study - self.sample = sample - self.library_name = library_name - self.ibrary_strategy = ibrary_strategy - self.library_source = library_source - self.library_selection = library_selection - self.library_layout = library_layout - self.insert_size = insert_size - self.library_construction_protocol = library_construction_protocol - self.platform = platform - self.instrument_model = instrument_model diff --git a/tests/test_ena_study.py b/tests/test_ena_study.py index e048c17..e5bbb38 100644 --- a/tests/test_ena_study.py +++ b/tests/test_ena_study.py @@ -1,5 +1,5 @@ import pytest -from read_isa_json.ena_objects.ena_study import EnaStudy +from ena_objects.ena_study import EnaStudy class TestEnaStudy: From 76c8ae651eff2de10c9da0311a1c4e5763f61164 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Wed, 27 Sep 2023 11:35:48 +0200 Subject: [PATCH 18/62] Rename test file to accomodate other class objects --- .../test_data}/isa_json_test_investigation.json | 0 tests/{test_ena_study.py => test_ena_objects.py} | 14 +++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) rename {read_isa_json/test_isa_json_files => tests/test_data}/isa_json_test_investigation.json (100%) rename tests/{test_ena_study.py => test_ena_objects.py} (71%) diff --git a/read_isa_json/test_isa_json_files/isa_json_test_investigation.json b/tests/test_data/isa_json_test_investigation.json similarity index 100% rename from read_isa_json/test_isa_json_files/isa_json_test_investigation.json rename to tests/test_data/isa_json_test_investigation.json diff --git a/tests/test_ena_study.py b/tests/test_ena_objects.py similarity index 71% rename from tests/test_ena_study.py rename to tests/test_ena_objects.py index e5bbb38..0af49d5 100644 --- a/tests/test_ena_study.py +++ b/tests/test_ena_objects.py @@ -1,5 +1,5 @@ import pytest -from ena_objects.ena_study import EnaStudy +from ena_objects.ena_study import EnaStudy, EnaSample, EnaExperiment, EnaRun class TestEnaStudy: @@ -16,3 +16,15 @@ def test_should_raise_key_error(self): KeyError, match="description was not found in the provided ISA JSON." ): EnaStudy.from_isa_json(bad_dict) + + +class TestEnaSample: + pass + + +class TestEnaExperiment: + pass + + +class TestEnaRun: + pass From 138c596be51e992008710ba9f591fe6d64ea5973 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Wed, 27 Sep 2023 11:58:48 +0200 Subject: [PATCH 19/62] Add test for reading an ISA JSON and producing studies --- tests/test_ena_objects.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/test_ena_objects.py b/tests/test_ena_objects.py index 0af49d5..2931c57 100644 --- a/tests/test_ena_objects.py +++ b/tests/test_ena_objects.py @@ -1,6 +1,12 @@ +import os import pytest +import json from ena_objects.ena_study import EnaStudy, EnaSample, EnaExperiment, EnaRun +test_isa_jsonfile = open("tests/test_data/isa_json_test_investigation.json") + +test_isa_json = json.load(test_isa_jsonfile) + class TestEnaStudy: """Test class for Ena Study objects""" @@ -17,6 +23,21 @@ def test_should_raise_key_error(self): ): EnaStudy.from_isa_json(bad_dict) + def test_ena_study_creation(self): + studies = EnaStudy.from_isa_json(test_isa_json) + + assert len(studies) == 1 + assert studies[0].__dict__() == dict( + { + "alias": "https://datahub.elixir-belgium.org/studies/2", + "title": "ENA Upload Study", + "study_type": "", + "study_abstract": "", + "new_study_type": None, + "pubmed_id": [1], + } + ) + class TestEnaSample: pass From eb9eed36a4e44730efb6a3374b080a2894cf7ac0 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Wed, 27 Sep 2023 12:23:16 +0200 Subject: [PATCH 20/62] Delete read_isa_json folder and move example script --- .../read_isa_json.py => example_read_isa_json.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) rename read_isa_json/read_isa_json.py => example_read_isa_json.py (62%) diff --git a/read_isa_json/read_isa_json.py b/example_read_isa_json.py similarity index 62% rename from read_isa_json/read_isa_json.py rename to example_read_isa_json.py index 0b5e491..3ffb8d3 100644 --- a/read_isa_json/read_isa_json.py +++ b/example_read_isa_json.py @@ -1,13 +1,11 @@ import json -from ena_objects.ena_sample import EnaSample, export_samples_to_dataframe +from ena_objects.ena_sample import export_samples_to_dataframe -from ena_objects.ena_study import EnaStudy +from ena_objects.ena_study import EnaStudy, EnaSample # Read json file -isa_json_file = open( - "read-isa-json/test_isa_json_files/isa_json_test_investigation.json" -) +isa_json_file = open("tests/test_data/isa_json_test_investigation.json") isa_json = json.load(isa_json_file) studies = EnaStudy.from_isa_json(isa_json) From b0cd314299fc3dd96c34143c339636ee56dd8307 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Wed, 27 Sep 2023 21:29:50 +0200 Subject: [PATCH 21/62] Worked on parsing experiment data for ENA submissions --- ena_objects/characteristic.py | 58 ++++-- ena_objects/ena_experiment.py | 205 +++++++++++++------ ena_objects/ena_sample.py | 9 +- ena_objects/ena_study.py | 50 +++-- ena_objects/other_material.py | 13 +- ena_objects/other_material_characteristic.py | 8 +- ena_objects/parameter_value.py | 8 +- example_read_isa_json.py | 7 +- tests/test_ena_objects.py | 40 ++-- 9 files changed, 268 insertions(+), 130 deletions(-) diff --git a/ena_objects/characteristic.py b/ena_objects/characteristic.py index e6890b4..f722bdd 100644 --- a/ena_objects/characteristic.py +++ b/ena_objects/characteristic.py @@ -1,12 +1,25 @@ from typing import List, Dict + +from decopatch import class_decorator +from exceptiongroup import catch from ena_objects.ena_std_lib import validate_dict +def fetch_category_name(categories: Dict, name: str) -> str: + for cat in categories: + if name["@id"] == cat["id"]: + if "name" in cat: + return cat["name"] + elif "value" in cat: + return cat["value"] + + class IsaBase: """ This is the base class """ + @classmethod def check_dict_keys(self, dict: Dict, mandatory_keys): [validate_dict(dict=dict, key=key) for key in mandatory_keys] @@ -16,15 +29,17 @@ class Category(IsaBase): This represents a category object in a Characteristic """ - def __init__(self, id: str) -> None: + def __init__(self, id: str, name: str) -> None: self.id = id + self.name = name - mandatory_keys = ["id"] + mandatory_keys = ["@id"] - def from_dict(self, dict: Dict): + @classmethod + def from_dict(self, dict: Dict, categories: Dict): super().check_dict_keys(dict, self.mandatory_keys) - return Category(id=dict["id"]) + return Category(id=dict["@id"], name=fetch_category_name(categories, dict)) class Value(IsaBase): @@ -32,7 +47,7 @@ class Value(IsaBase): This represents a Value object in a Characteristic """ - mandatory_keys = ["annotation_value", "term_accession", "term_source"] + mandatory_keys = ["annotationValue", "termSource", "termAccession"] def __init__( self, annotation_value: str, term_source: str = "", term_accession: str = "" @@ -41,13 +56,14 @@ def __init__( self.term_source = term_source self.term_accession = term_accession + @classmethod def from_dict(self, dict: Dict): super().check_dict_keys(dict, self.mandatory_keys) - return Unit( - annotation_value=dict["annotation_value"], - term_accession=dict["term_accession"], - term_source=dict["term_source"], + return Value( + annotation_value=dict["annotationValue"], + term_accession=dict["termAccession"], + term_source=dict["termSource"], ) @@ -56,21 +72,22 @@ class Unit(IsaBase): This represents the Unit object in a Characteristic """ - mandatory_keys = ["tern_source", "term_accession", "comments"] + mandatory_keys = ["termSource", "termAccession", "comments"] def __init__( - self, term_source: str, term_accession: str, comments: List[any] + self, term_source: str, term_accession: str, comments: List[str] ) -> None: self.term_source = term_source self.term_accession = term_accession self.comments = comments + @classmethod def from_dict(self, dict: Dict): super().check_dict_keys(dict, self.mandatory_keys) return Unit( - term_source=dict["term_source"], - term_accession=dict["term_accession"], + term_source=dict["termSource"], + term_accession=dict["termAccession"], comments=dict["comments"], ) @@ -81,17 +98,24 @@ class Characteristic(IsaBase): """ mandatory_keys = ["category", "value", "unit"] + parameters = [] def __init__(self, category: Category, value: Value, unit: Unit) -> None: self.category = category self.value = value self.unit = unit - def from_dict(self, dict: Dict): + @classmethod + def from_dict(self, dict: Dict, categories: Dict): super().check_dict_keys(dict, self.mandatory_keys) - - return Unit( - category=Category.from_dict(dict["category"]), + return self( + category=Category.from_dict(dict["category"], categories), value=Value.from_dict(dict["value"]), unit=Unit.from_dict(dict["unit"]), ) + + def to_dict(self) -> Dict: + return { + "category": self.category.name, + "value": self.value.annotation_value, + } diff --git a/ena_objects/ena_experiment.py b/ena_objects/ena_experiment.py index 7547c8e..d6e0559 100644 --- a/ena_objects/ena_experiment.py +++ b/ena_objects/ena_experiment.py @@ -1,29 +1,20 @@ +import re from typing import List, Dict, Union, Optional -from ena_objects.ena_std_lib import filter_attribute_by +from pandas import DataFrame +from ena_objects import other_material_characteristic + +from ena_objects.ena_std_lib import filter_attribute_by, validate_dict from ena_objects.characteristic import IsaBase from ena_objects.ena_sample import EnaSample +from ena_objects.other_material_characteristic import OtherMaterialCharacteristic from ena_objects.parameter_value import ParameterValue from ena_objects.other_material import OtherMaterial -# def filter_other_material_attribute_by( -# data: List[Dict], -# filter_key: str, -# filter_val: str, -# return_key: str, -# ) -> Union[str, int]: -# return [ -# attribute[return_key] -# for attribute in data -# if attribute[filter_key] == filter_val -# ] - - -def experiment_alias(assay_dict: Dict): - prefix = "https://datahub.elixir-belgium.org/assays/" # TODO: Replace by something less hard-coded - seek_assays_id: str = assay_dict["@id"] - return prefix + seek_assays_id +def experiment_alias(other_material: OtherMaterial): + seek_assays_id: str = re.split("/", other_material.id)[1] + return EnaExperiment.prefix + seek_assays_id def fetch_characteristic_categories(study_dict: Dict): @@ -35,26 +26,70 @@ def fetch_characteristic_categories(study_dict: Dict): def get_other_materials(study_dict: Dict) -> List[OtherMaterial]: other_materials = [] - - for study in study_dict["studies"]: - for assay in study["assays"]: - for om in assay["materials"]["otherMaterials"]: - other_materials.append(om) - - return [ - OtherMaterial.from_dict(other_material) for other_material in other_materials - ] - - -# def parameter_id(study_isa_jon: Dict, parameter_name: str) -> Optional[int]: -# for protocol in study_isa_jon["protocols"]: -# for parameter in protocol["parameters"]: -# if parameter["parameterName"]["annotationValue"] == parameter_name: -# return parameter["@id"] - - -# def parameter_value(study_isa_json: Dict, parameter_name: str) -> List[any]: -# pass + characteristics_categories = fetch_characteristic_categories(study_dict) + # parameters = fetch_parameters() + for assay in study_dict["assays"]: + for om in assay["materials"]["otherMaterials"]: + other_material = OtherMaterial.from_dict( + dict=om, characteristics_categories=characteristics_categories + ) + other_materials.append(other_material) + + return other_materials + + +def library_names(study_dict: Dict) -> List[str]: + return [om["name"] for om in get_other_materials(study_dict)] + + +def sample_associations(assay_dict: Dict): + process_sequence = [] + for process in assay_dict["processSequence"]: + input_ids = [input["@id"] for input in process["inputs"]] + output_ids = [output["@id"] for output in process["outputs"]] + process_sequence.append({"input": input_ids, "output": output_ids}) + + return process_sequence + + +def get_derived_sample_alias(other_material: OtherMaterial, study_dict: Dict) -> str: + assoc_sample_ids = [] + for assay in study_dict["assays"]: + for sa in sample_associations(assay): + if other_material.id in sa["output"]: + for input in sa["input"]: + alias = EnaSample.prefix + re.split("/", input)[-1] + assoc_sample_ids.append(alias) + return assoc_sample_ids + + +def fetch_parameters(protocol_dict: Dict): + parameters = [] + for protocol in protocol_dict: + for parameter in protocol["parameters"]: + parameters.append( + { + "id": parameter["@id"], + "name": parameter["parameterName"]["annotationValue"], + } + ) + return parameters + + +def get_parameter_values(study_dict: Dict) -> Dict: + param_vals = [] + parameters = fetch_parameters(study_dict["protocols"]) + for assay in study_dict["assays"]: + for ps in assay["processSequence"]: + sample_id = re.split("/", ps["@id"])[-1] + parameter_values = [ + ParameterValue.from_dict(parameter_value, parameters) + for parameter_value in ps["parameterValues"] + ] + param_vals.append( + {"sample_id": sample_id, "paramter_values": parameter_values} + ) + return param_vals class EnaExperiment(IsaBase): @@ -63,40 +98,82 @@ class EnaExperiment(IsaBase): """ mandatory_keys = [ - "alias", - "title", - "study", - "sample", - "parameter_values", - "other_material", + "protocols", + "materials", + "processSequence", + "assays", ] + prefix = "https://datahub.elixir-belgium.org/assays/" # TODO: Replace by something less hard-coded def __init__( self, alias: str, - title: str, study_alias: str, sample_alias: str, library_name: str, - ibrary_strategy, - library_source: str, - library_selection: str, - library_layout: str, - insert_size: str, - library_construction_protocol: str, - platform: str, - instrument_model: str, + parameter_values: List[ParameterValue] = [], + other_material_characteristics: List[OtherMaterialCharacteristic] = [], ) -> None: self.alias = alias - self.title = title - self.study = study_alias - self.sample = sample_alias + self.study_alias = study_alias + self.sample_alias = sample_alias self.library_name = library_name - self.ibrary_strategy = ibrary_strategy - self.library_source = library_source - self.library_selection = library_selection - self.library_layout = library_layout - self.insert_size = insert_size - self.library_construction_protocol = library_construction_protocol - self.platform = platform - self.instrument_model = instrument_model + self.parameter_values = parameter_values + self.other_material_characteristics = other_material_characteristics + + def to_dict(self) -> Dict: + return { + "alias": self.alias, + "study_alias": self.sample_alias, + "sample_alias": self.sample_alias, + "library_name": self.library_name, + "parameter_values": [pv for pv in self.parameter_values], + "other_material_characteristics": [ + omc.to_dict() for omc in self.other_material_characteristics + ], + } + + def from_study_dict(study_dict: Dict, study_alias): + [validate_dict(study_dict, key) for key in EnaExperiment.mandatory_keys] + + other_materials = get_other_materials(study_dict) + + # protocol_parameters = protocol_parameters(study_dict["protocols"]) + parameter_values = get_parameter_values(study_dict) + + ena_experiments = [] + for om in other_materials: + om_id = re.split("/", om.id)[-1] + ena_experiments.append( + EnaExperiment( + alias=experiment_alias(om), + library_name=om.name, + study_alias=study_alias, + sample_alias=get_derived_sample_alias(om, study_dict), + parameter_values=list( + filter(lambda pv: pv["sample_id"] == om_id, parameter_values) + ), + other_material_characteristics=om.other_material_characteristics, + ) + ) + return ena_experiments + + +def export_experiments_to_dataframe(experiments: List[EnaExperiment]) -> DataFrame: + flat_dicts = [] + for experiment in experiments: + experiment_dict = experiment.to_dict() + other_material_characteristics = experiment_dict.pop( + "other_material_characteristics" + ) + # omc_dicts = [omc.to_dict() for omc in other_material_characteristics] + + parameter_values = experiment_dict.pop("parameter_values") + # pv_dicts = [pv.to_dict() for pv in parameter_values] + + for omc in omc_dicts: + experiment_dict.update({omc["category"]: omc["value"]}) + for pv in pv_dicts: + experiment_dict.update({pv["category"]: pv["value"]}) + flat_dicts.append(experiment_dict) + return DataFrame.from_dict(flat_dicts) diff --git a/ena_objects/ena_sample.py b/ena_objects/ena_sample.py index 117d42c..bbceb66 100644 --- a/ena_objects/ena_sample.py +++ b/ena_objects/ena_sample.py @@ -89,10 +89,9 @@ def sample_alias(id: str) -> str: Returns: str: Unique string representation of the alias """ - prefix = "https://datahub.elixir-belgium.org/samples/" # TODO: Replace by something less hard-coded sample_id = re.split("/", id)[1] - return prefix + sample_id + return EnaSample.prefix + sample_id class EnaSample: @@ -100,11 +99,13 @@ class EnaSample: Generates an Sample object, compliant to the requirements of ENA """ + prefix: str = "https://datahub.elixir-belgium.org/samples/" # TODO: Replace by something less hard-coded + def __init__(self, characteristics: Dict, alias: str) -> None: self.alias = alias self.characteristics = characteristics - def __dict__(self): + def to_dict(self) -> Dict: return { "alias": self.alias, "characteristics": self.characteristics, @@ -162,7 +163,7 @@ def export_samples_to_dataframe(samples: List[EnaSample]): """ flat_dicts = [] for sample in samples: - sample_dict = sample.__dict__() + sample_dict = sample.to_dict() characteristics = sample_dict.pop("characteristics") for char in characteristics: sample_dict.update({char["category_name"]: char["value"]}) diff --git a/ena_objects/ena_study.py b/ena_objects/ena_study.py index 531121d..f64e6d1 100644 --- a/ena_objects/ena_study.py +++ b/ena_objects/ena_study.py @@ -1,5 +1,8 @@ from typing import List, Optional, Dict from pandas import DataFrame +from ena_objects.ena_experiment import EnaExperiment +from ena_objects.ena_run import EnaRun +from ena_objects.ena_sample import EnaSample from ena_objects.ena_std_lib import filter_attribute_by, validate_dict @@ -24,17 +27,17 @@ def study_alias(study_isa_json: str) -> str: Returns: str: the study_alias """ - prefix = "https://datahub.elixir-belgium.org/studies/" # TODO: Replace by something less hard-coded seek_study_id: str = filter_attribute_by( study_isa_json["comments"], key="name", value="SEEK Study ID" )[0]["value"] - return prefix + seek_study_id + return EnaStudy.prefix + seek_study_id class EnaStudy: """Generates a Study object, compliant to the requirements of ENA""" mandatory_keys = ["title", "description", "publications"] + prefix = "https://datahub.elixir-belgium.org/studies/" # TODO: Replace by something less hard-coded def __init__( self, @@ -42,6 +45,9 @@ def __init__( title: str, study_type: str, study_abstract: str, + samples: List[EnaSample], + experiments: List[EnaExperiment] = [], + runs: List[EnaRun] = [], new_study_type: Optional[str] = None, pubmed_id: Optional[List[int]] = None, ) -> None: @@ -53,7 +59,11 @@ def __init__( self.new_study_type = new_study_type self.pubmed_id = pubmed_id - def __dict__(self): + self.samples = samples + self.experiments = experiments + self.runs = runs + + def to_dict(self): return { "alias": self.alias, "title": self.title, @@ -74,19 +84,27 @@ def from_isa_json(isa_json: Dict): """ [validate_dict(isa_json, key) for key in EnaStudy.mandatory_keys] - return [ - EnaStudy( - alias=study_alias(study), - title=study["title"], - study_type="", # TODO: Replace by Custom metadata of the Assay level - study_abstract=study["description"], - new_study_type=None, - pubmed_id=study_publication_ids( - publication_isa_json=study["publications"] - ), + ena_studies = [] + + for study in isa_json["studies"]: + ena_samples = EnaSample.from_study_dict(study) + + ena_studies.append( + EnaStudy( + alias=study_alias(study), + title=study["title"], + study_type="", # TODO: Replace by Custom metadata of the Assay level + study_abstract=study["description"], + new_study_type=None, + samples=ena_samples, + experiments=EnaExperiment, + pubmed_id=study_publication_ids( + publication_isa_json=study["publications"] + ), + ) ) - for study in isa_json["studies"] - ] + + return ena_studies def to_dataframe(self) -> DataFrame: """Dumps the study object in a pandas DataFrame of the object @@ -94,4 +112,4 @@ def to_dataframe(self) -> DataFrame: Returns: DataFrame: Pandas DataFrame representation of the Study """ - return DataFrame.from_dict(self.__dict__()) + return DataFrame.from_dict(self.to_dict()) diff --git a/ena_objects/other_material.py b/ena_objects/other_material.py index 223a43c..a50fe41 100644 --- a/ena_objects/other_material.py +++ b/ena_objects/other_material.py @@ -25,13 +25,14 @@ def __init__( self.type = type self.other_material_characteristics = other_material_characteristics - def from_dict(self, dict): - super().check_dict_keys(dict, self.mandatory_keys) + @classmethod + def from_dict(cls, dict, characteristics_categories): return OtherMaterial( - id=dict["id"], + id=dict["@id"], name=dict["name"], type=dict["type"], - other_material_characteristics=OtherMaterialCharacteristic.from_dict( - dict["other_material_characteristics"] - ), + other_material_characteristics=[ + OtherMaterialCharacteristic.from_dict(char, characteristics_categories) + for char in dict["characteristics"] + ], ) diff --git a/ena_objects/other_material_characteristic.py b/ena_objects/other_material_characteristic.py index 55c411b..e101c5b 100644 --- a/ena_objects/other_material_characteristic.py +++ b/ena_objects/other_material_characteristic.py @@ -10,5 +10,9 @@ class OtherMaterialCharacteristic(Characteristic): def __init__(self, category: Category, value: Value, unit: Unit) -> None: super().__init__(category, value, unit) - def from_dict(self, dict: Dict): - return super().from_dict(dict) + @classmethod + def from_dict(cls, dict: Dict, characteristics_categories: Dict): + return super().from_dict(dict, characteristics_categories) + + def to_dict(self) -> Dict: + return super().to_dict() diff --git a/ena_objects/parameter_value.py b/ena_objects/parameter_value.py index 3f0f99c..326464d 100644 --- a/ena_objects/parameter_value.py +++ b/ena_objects/parameter_value.py @@ -11,5 +11,9 @@ class ParameterValue(Characteristic): def __init__(self, category: Category, value: Value, unit: Unit) -> None: super().__init__(category, value, unit) - def from_dict(self, dict: Dict): - return super().from_dict(dict) + @classmethod + def from_dict(self, dict: Dict, parameters: Dict): + return super().from_dict(dict, parameters) + + def to_dict(self) -> Dict: + return super().to_dict() diff --git a/example_read_isa_json.py b/example_read_isa_json.py index 3ffb8d3..9c271c2 100644 --- a/example_read_isa_json.py +++ b/example_read_isa_json.py @@ -1,8 +1,9 @@ import json +from ena_objects.ena_experiment import export_experiments_to_dataframe from ena_objects.ena_sample import export_samples_to_dataframe -from ena_objects.ena_study import EnaStudy, EnaSample +from ena_objects.ena_study import EnaStudy, EnaSample, EnaExperiment # Read json file isa_json_file = open("tests/test_data/isa_json_test_investigation.json") @@ -17,4 +18,8 @@ samples = EnaSample.from_study_dict(study_dict) samples_df = export_samples_to_dataframe(samples) print(samples_df) + +experiments = EnaExperiment.from_study_dict(study_dict, studies[0].alias) +experiments_df = export_experiments_to_dataframe(experiments) +print(experiments_df) print("Done!") diff --git a/tests/test_ena_objects.py b/tests/test_ena_objects.py index 2931c57..e1456c0 100644 --- a/tests/test_ena_objects.py +++ b/tests/test_ena_objects.py @@ -11,36 +11,40 @@ class TestEnaStudy: """Test class for Ena Study objects""" - def test_should_raise_key_error(self): - bad_dict = { - "title": "My Title", - "study_description": "Should 'description'", - "publications": None, - } + bad_dict = { + "title": "My Title", + "study_description": "Should 'description'", + "publications": None, + } + + test_study_dict = { + "alias": "https://datahub.elixir-belgium.org/studies/2", + "title": "ENA Upload Study", + "study_type": "", + "study_abstract": "", + "new_study_type": None, + "pubmed_id": [1], + } + def test_should_raise_key_error(self): with pytest.raises( KeyError, match="description was not found in the provided ISA JSON." ): - EnaStudy.from_isa_json(bad_dict) + EnaStudy.from_isa_json(self.bad_dict) def test_ena_study_creation(self): studies = EnaStudy.from_isa_json(test_isa_json) assert len(studies) == 1 - assert studies[0].__dict__() == dict( - { - "alias": "https://datahub.elixir-belgium.org/studies/2", - "title": "ENA Upload Study", - "study_type": "", - "study_abstract": "", - "new_study_type": None, - "pubmed_id": [1], - } - ) + assert studies[0].to_dict() == self.test_study_dict class TestEnaSample: - pass + study_dict = test_isa_json["studies"][0] + + def test_sample_creation(self): + samples = EnaSample.from_study_dict(self.study_dict) + assert len(samples) == 6 class TestEnaExperiment: From fa4f66da54980470e32a617c65c2fe47b4ab104b Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Thu, 28 Sep 2023 09:45:13 +0200 Subject: [PATCH 22/62] Implement exporting to dataframe --- ena_objects/characteristic.py | 136 ++++++++++--------- ena_objects/ena_experiment.py | 80 +++++++---- ena_objects/other_material_characteristic.py | 6 +- ena_objects/parameter_value.py | 6 +- example_read_isa_json.py | 4 +- 5 files changed, 135 insertions(+), 97 deletions(-) diff --git a/ena_objects/characteristic.py b/ena_objects/characteristic.py index f722bdd..e471266 100644 --- a/ena_objects/characteristic.py +++ b/ena_objects/characteristic.py @@ -5,15 +5,6 @@ from ena_objects.ena_std_lib import validate_dict -def fetch_category_name(categories: Dict, name: str) -> str: - for cat in categories: - if name["@id"] == cat["id"]: - if "name" in cat: - return cat["name"] - elif "value" in cat: - return cat["value"] - - class IsaBase: """ This is the base class @@ -24,72 +15,87 @@ def check_dict_keys(self, dict: Dict, mandatory_keys): [validate_dict(dict=dict, key=key) for key in mandatory_keys] -class Category(IsaBase): - """ - This represents a category object in a Characteristic - """ +# class Category(IsaBase): +# """ +# This represents a category object in a Characteristic +# """ - def __init__(self, id: str, name: str) -> None: - self.id = id - self.name = name +# def __init__(self, id: str, name: str) -> None: +# self.id = id +# self.name = name - mandatory_keys = ["@id"] +# mandatory_keys = ["@id"] - @classmethod - def from_dict(self, dict: Dict, categories: Dict): - super().check_dict_keys(dict, self.mandatory_keys) +# @classmethod +# def from_dict(self, dict: Dict, categories: Dict): +# super().check_dict_keys(dict, self.mandatory_keys) - return Category(id=dict["@id"], name=fetch_category_name(categories, dict)) +# return self(id=dict["@id"], name=fetch_category_name(categories, dict)) -class Value(IsaBase): - """ - This represents a Value object in a Characteristic - """ +# class Value(IsaBase): +# """ +# This represents a Value object in a Characteristic +# """ - mandatory_keys = ["annotationValue", "termSource", "termAccession"] +# mandatory_keys = ["annotationValue", "termSource", "termAccession"] - def __init__( - self, annotation_value: str, term_source: str = "", term_accession: str = "" - ) -> None: - self.annotation_value = annotation_value - self.term_source = term_source - self.term_accession = term_accession +# def __init__( +# self, annotation_value: str, term_source: str = "", term_accession: str = "" +# ) -> None: +# self.annotation_value = annotation_value +# self.term_source = term_source +# self.term_accession = term_accession - @classmethod - def from_dict(self, dict: Dict): - super().check_dict_keys(dict, self.mandatory_keys) +# @classmethod +# def from_dict(self, dict: Dict): +# super().check_dict_keys(dict, self.mandatory_keys) - return Value( - annotation_value=dict["annotationValue"], - term_accession=dict["termAccession"], - term_source=dict["termSource"], - ) +# return Value( +# annotation_value=dict["annotationValue"], +# term_accession=dict["termAccession"], +# term_source=dict["termSource"], +# ) -class Unit(IsaBase): - """ - This represents the Unit object in a Characteristic - """ +# class Unit(IsaBase): +# """ +# This represents the Unit object in a Characteristic +# """ - mandatory_keys = ["termSource", "termAccession", "comments"] +# mandatory_keys = ["termSource", "termAccession", "comments"] - def __init__( - self, term_source: str, term_accession: str, comments: List[str] - ) -> None: - self.term_source = term_source - self.term_accession = term_accession - self.comments = comments +# def __init__( +# self, term_source: str, term_accession: str, comments: List[str] +# ) -> None: +# self.term_source = term_source +# self.term_accession = term_accession +# self.comments = comments - @classmethod - def from_dict(self, dict: Dict): - super().check_dict_keys(dict, self.mandatory_keys) +# @classmethod +# def from_dict(self, dict: Dict): +# super().check_dict_keys(dict, self.mandatory_keys) - return Unit( - term_source=dict["termSource"], - term_accession=dict["termAccession"], - comments=dict["comments"], - ) +# return Unit( +# term_source=dict["termSource"], +# term_accession=dict["termAccession"], +# comments=dict["comments"], +# ) + + +def fetch_category_name(categories: Dict, name: str) -> str: + for cat in categories: + if name["@id"] == cat["id"]: + if "name" in cat: + return cat["name"] + elif "value" in cat: + return cat["value"] + + +def category_dict(dict: Dict, categories: Dict): + category_name = fetch_category_name(categories, dict) + category_id = dict["@id"] + return {"id": category_id, "name": category_name} class Characteristic(IsaBase): @@ -100,22 +106,20 @@ class Characteristic(IsaBase): mandatory_keys = ["category", "value", "unit"] parameters = [] - def __init__(self, category: Category, value: Value, unit: Unit) -> None: + def __init__(self, category: Dict, value: str) -> None: self.category = category self.value = value - self.unit = unit @classmethod def from_dict(self, dict: Dict, categories: Dict): super().check_dict_keys(dict, self.mandatory_keys) return self( - category=Category.from_dict(dict["category"], categories), - value=Value.from_dict(dict["value"]), - unit=Unit.from_dict(dict["unit"]), + category=category_dict(dict["category"], categories), + value=dict["value"]["annotationValue"], ) def to_dict(self) -> Dict: return { - "category": self.category.name, - "value": self.value.annotation_value, + "category": self.category, + "value": self.value, } diff --git a/ena_objects/ena_experiment.py b/ena_objects/ena_experiment.py index d6e0559..1a2e935 100644 --- a/ena_objects/ena_experiment.py +++ b/ena_objects/ena_experiment.py @@ -12,16 +12,38 @@ from ena_objects.other_material import OtherMaterial +def clip_off_prefix(alias: Union[str, List[str]]) -> Union[str, List[str]]: + if isinstance(alias, str): + result = re.split("/", alias)[-1] + elif isinstance(alias, list): + result = [] + for item in alias: + if isinstance(item, str): + result.append(re.split("/", item)[-1]) + else: + raise TypeError( + "The 'clip_off_prefix' function only accepts strings or a list of strings" + ) + else: + raise TypeError( + "The 'clip_off_prefix' function only accepts strings or a list of strings" + ) + return result + + def experiment_alias(other_material: OtherMaterial): - seek_assays_id: str = re.split("/", other_material.id)[1] + seek_assays_id: str = clip_off_prefix(other_material.id) return EnaExperiment.prefix + seek_assays_id def fetch_characteristic_categories(study_dict: Dict): - return [ - {"id": cc["@id"], "value": cc["characteristicType"]["annotationValue"]} - for cc in study_dict["characteristicCategories"] - ] + categories = [] + for assay in study_dict["assays"]: + for cc in assay["characteristicCategories"]: + categories.append( + {"id": cc["@id"], "value": cc["characteristicType"]["annotationValue"]} + ) + return categories def get_other_materials(study_dict: Dict) -> List[OtherMaterial]: @@ -42,7 +64,7 @@ def library_names(study_dict: Dict) -> List[str]: return [om["name"] for om in get_other_materials(study_dict)] -def sample_associations(assay_dict: Dict): +def get_sample_associations(assay_dict: Dict): process_sequence = [] for process in assay_dict["processSequence"]: input_ids = [input["@id"] for input in process["inputs"]] @@ -55,10 +77,13 @@ def sample_associations(assay_dict: Dict): def get_derived_sample_alias(other_material: OtherMaterial, study_dict: Dict) -> str: assoc_sample_ids = [] for assay in study_dict["assays"]: - for sa in sample_associations(assay): - if other_material.id in sa["output"]: + sample_associations = get_sample_associations(assay) + for sa in sample_associations: + if clip_off_prefix(other_material.id) in clip_off_prefix(sa["output"]): + # sa["output"] => '#sample/' + # other_material.id => '#other_material/' for input in sa["input"]: - alias = EnaSample.prefix + re.split("/", input)[-1] + alias = EnaSample.prefix + clip_off_prefix(input) assoc_sample_ids.append(alias) return assoc_sample_ids @@ -81,13 +106,13 @@ def get_parameter_values(study_dict: Dict) -> Dict: parameters = fetch_parameters(study_dict["protocols"]) for assay in study_dict["assays"]: for ps in assay["processSequence"]: - sample_id = re.split("/", ps["@id"])[-1] + sample_id = clip_off_prefix(ps["@id"]) parameter_values = [ ParameterValue.from_dict(parameter_value, parameters) for parameter_value in ps["parameterValues"] ] param_vals.append( - {"sample_id": sample_id, "paramter_values": parameter_values} + {"sample_id": sample_id, "parameter_values": parameter_values} ) return param_vals @@ -127,7 +152,7 @@ def to_dict(self) -> Dict: "study_alias": self.sample_alias, "sample_alias": self.sample_alias, "library_name": self.library_name, - "parameter_values": [pv for pv in self.parameter_values], + "parameter_values": [pv.to_dict() for pv in self.parameter_values], "other_material_characteristics": [ omc.to_dict() for omc in self.other_material_characteristics ], @@ -143,16 +168,24 @@ def from_study_dict(study_dict: Dict, study_alias): ena_experiments = [] for om in other_materials: - om_id = re.split("/", om.id)[-1] + om_id = clip_off_prefix(om.id) + s_alias = get_derived_sample_alias(om, study_dict) + filtered_parameter_vals = list( + filter(lambda pv: pv["sample_id"] == om_id, parameter_values) + ) + + parameter_vals = [] + for fpv in filtered_parameter_vals: + for pv in fpv["parameter_values"]: + parameter_vals.append(pv) + ena_experiments.append( EnaExperiment( alias=experiment_alias(om), library_name=om.name, study_alias=study_alias, - sample_alias=get_derived_sample_alias(om, study_dict), - parameter_values=list( - filter(lambda pv: pv["sample_id"] == om_id, parameter_values) - ), + sample_alias=s_alias, + parameter_values=parameter_vals, other_material_characteristics=om.other_material_characteristics, ) ) @@ -166,14 +199,15 @@ def export_experiments_to_dataframe(experiments: List[EnaExperiment]) -> DataFra other_material_characteristics = experiment_dict.pop( "other_material_characteristics" ) - # omc_dicts = [omc.to_dict() for omc in other_material_characteristics] parameter_values = experiment_dict.pop("parameter_values") - # pv_dicts = [pv.to_dict() for pv in parameter_values] - for omc in omc_dicts: - experiment_dict.update({omc["category"]: omc["value"]}) - for pv in pv_dicts: - experiment_dict.update({pv["category"]: pv["value"]}) + for omc in other_material_characteristics: + experiment_dict.update({omc["category"]["name"]: omc["value"]}) + + for pv in parameter_values: + experiment_dict.update({pv["category"]["name"]: pv["value"]}) + flat_dicts.append(experiment_dict) + return DataFrame.from_dict(flat_dicts) diff --git a/ena_objects/other_material_characteristic.py b/ena_objects/other_material_characteristic.py index e101c5b..88c03da 100644 --- a/ena_objects/other_material_characteristic.py +++ b/ena_objects/other_material_characteristic.py @@ -1,5 +1,5 @@ from typing import Dict -from ena_objects.characteristic import Category, Characteristic, Unit, Value +from ena_objects.characteristic import Characteristic class OtherMaterialCharacteristic(Characteristic): @@ -7,8 +7,8 @@ class OtherMaterialCharacteristic(Characteristic): This class represents the other material object. """ - def __init__(self, category: Category, value: Value, unit: Unit) -> None: - super().__init__(category, value, unit) + def __init__(self, category: Dict, value: str) -> None: + super().__init__(category, value) @classmethod def from_dict(cls, dict: Dict, characteristics_categories: Dict): diff --git a/ena_objects/parameter_value.py b/ena_objects/parameter_value.py index 326464d..b85f250 100644 --- a/ena_objects/parameter_value.py +++ b/ena_objects/parameter_value.py @@ -1,5 +1,5 @@ from typing import Dict -from ena_objects.characteristic import Category, Characteristic, Unit, Value +from ena_objects.characteristic import Characteristic class ParameterValue(Characteristic): @@ -8,8 +8,8 @@ class ParameterValue(Characteristic): and is inherited from the Characteristic class """ - def __init__(self, category: Category, value: Value, unit: Unit) -> None: - super().__init__(category, value, unit) + def __init__(self, category: Dict, value: str) -> None: + super().__init__(category, value) @classmethod def from_dict(self, dict: Dict, parameters: Dict): diff --git a/example_read_isa_json.py b/example_read_isa_json.py index 9c271c2..1d3b390 100644 --- a/example_read_isa_json.py +++ b/example_read_isa_json.py @@ -12,14 +12,14 @@ studies = EnaStudy.from_isa_json(isa_json) study_dfs = [study.to_dataframe() for study in studies] print(study_dfs[0]) - +study = studies[0] study_dict = isa_json["studies"][0] samples = EnaSample.from_study_dict(study_dict) samples_df = export_samples_to_dataframe(samples) print(samples_df) -experiments = EnaExperiment.from_study_dict(study_dict, studies[0].alias) +experiments = EnaExperiment.from_study_dict(study_dict, study.alias) experiments_df = export_experiments_to_dataframe(experiments) print(experiments_df) print("Done!") From 479c95a70d35fccb8ce3e91efcabd9cedcd80fe8 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Thu, 28 Sep 2023 09:48:13 +0200 Subject: [PATCH 23/62] Change alias prefix of experiments to the samples url. Makes more sense? --- ena_objects/ena_experiment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ena_objects/ena_experiment.py b/ena_objects/ena_experiment.py index 1a2e935..9b4f8b0 100644 --- a/ena_objects/ena_experiment.py +++ b/ena_objects/ena_experiment.py @@ -128,7 +128,7 @@ class EnaExperiment(IsaBase): "processSequence", "assays", ] - prefix = "https://datahub.elixir-belgium.org/assays/" # TODO: Replace by something less hard-coded + prefix = "https://datahub.elixir-belgium.org/samples/" # TODO: Replace by something less hard-coded def __init__( self, From 16452482de9733f0743c465ce5eaf02381e4bb06 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Thu, 28 Sep 2023 10:33:46 +0200 Subject: [PATCH 24/62] Get sample alias out of the list --- ena_objects/ena_experiment.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/ena_objects/ena_experiment.py b/ena_objects/ena_experiment.py index 9b4f8b0..4be092e 100644 --- a/ena_objects/ena_experiment.py +++ b/ena_objects/ena_experiment.py @@ -1,10 +1,9 @@ import re -from typing import List, Dict, Union, Optional +from typing import List, Dict, Union from pandas import DataFrame -from ena_objects import other_material_characteristic -from ena_objects.ena_std_lib import filter_attribute_by, validate_dict +from ena_objects.ena_std_lib import validate_dict from ena_objects.characteristic import IsaBase from ena_objects.ena_sample import EnaSample from ena_objects.other_material_characteristic import OtherMaterialCharacteristic @@ -74,7 +73,9 @@ def get_sample_associations(assay_dict: Dict): return process_sequence -def get_derived_sample_alias(other_material: OtherMaterial, study_dict: Dict) -> str: +def get_derived_sample_alias( + other_material: OtherMaterial, study_dict: Dict, return_multiple: bool = False +) -> str: assoc_sample_ids = [] for assay in study_dict["assays"]: sample_associations = get_sample_associations(assay) @@ -82,9 +83,13 @@ def get_derived_sample_alias(other_material: OtherMaterial, study_dict: Dict) -> if clip_off_prefix(other_material.id) in clip_off_prefix(sa["output"]): # sa["output"] => '#sample/' # other_material.id => '#other_material/' - for input in sa["input"]: - alias = EnaSample.prefix + clip_off_prefix(input) - assoc_sample_ids.append(alias) + if return_multiple: + for input in sa["input"]: + alias = EnaSample.prefix + clip_off_prefix(input) + assoc_sample_ids.append(alias) + else: + input = sa["input"][0] + return EnaSample.prefix + clip_off_prefix(input) return assoc_sample_ids @@ -158,8 +163,10 @@ def to_dict(self) -> Dict: ], } - def from_study_dict(study_dict: Dict, study_alias): - [validate_dict(study_dict, key) for key in EnaExperiment.mandatory_keys] + @classmethod + def from_study_dict(self, study_dict: Dict, study_alias: str): + super().check_dict_keys(study_dict, self.mandatory_keys) + # [validate_dict(study_dict, key) for key in EnaExperiment.mandatory_keys] other_materials = get_other_materials(study_dict) From f19d07dccf9e39a7bd9addea48068de1bd532740 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Thu, 28 Sep 2023 20:10:04 +0200 Subject: [PATCH 25/62] Implementation of Ena Runs --- ena_objects/ena_experiment.py | 14 +--- ena_objects/ena_run.py | 131 ++++++++++++++++++++++++++++++++-- ena_objects/ena_sample.py | 3 +- ena_objects/ena_std_lib.py | 10 +++ ena_objects/ena_study.py | 8 ++- example_read_isa_json.py | 5 ++ 6 files changed, 149 insertions(+), 22 deletions(-) diff --git a/ena_objects/ena_experiment.py b/ena_objects/ena_experiment.py index 4be092e..8b0ce8f 100644 --- a/ena_objects/ena_experiment.py +++ b/ena_objects/ena_experiment.py @@ -3,7 +3,7 @@ from pandas import DataFrame -from ena_objects.ena_std_lib import validate_dict +from ena_objects.ena_std_lib import get_assay_sample_associations from ena_objects.characteristic import IsaBase from ena_objects.ena_sample import EnaSample from ena_objects.other_material_characteristic import OtherMaterialCharacteristic @@ -63,22 +63,12 @@ def library_names(study_dict: Dict) -> List[str]: return [om["name"] for om in get_other_materials(study_dict)] -def get_sample_associations(assay_dict: Dict): - process_sequence = [] - for process in assay_dict["processSequence"]: - input_ids = [input["@id"] for input in process["inputs"]] - output_ids = [output["@id"] for output in process["outputs"]] - process_sequence.append({"input": input_ids, "output": output_ids}) - - return process_sequence - - def get_derived_sample_alias( other_material: OtherMaterial, study_dict: Dict, return_multiple: bool = False ) -> str: assoc_sample_ids = [] for assay in study_dict["assays"]: - sample_associations = get_sample_associations(assay) + sample_associations = get_assay_sample_associations(assay) for sa in sample_associations: if clip_off_prefix(other_material.id) in clip_off_prefix(sa["output"]): # sa["output"] => '#sample/' diff --git a/ena_objects/ena_run.py b/ena_objects/ena_run.py index 5385d68..120549a 100644 --- a/ena_objects/ena_run.py +++ b/ena_objects/ena_run.py @@ -1,19 +1,138 @@ from typing import List, Dict +from pandas import DataFrame -class EnaRun: +from ena_objects.characteristic import IsaBase +from ena_objects.ena_experiment import clip_off_prefix +from ena_objects.ena_std_lib import get_assay_sample_associations + + +class DataFileComment(IsaBase): + mandatory_keys = ["name", "value"] + + def __init__(self, name: str, value: str) -> None: + super().__init__() + self.name = name + self.value = value + + @classmethod + def from_dict(self, comments_dict) -> None: + for comment in comments_dict: + super().check_dict_keys(dict=comment, mandatory_keys=self.mandatory_keys) + + return [ + DataFileComment(name=comment["name"], value=comment["value"]) + for comment in comments_dict + ] + + def to_dict(self) -> Dict: + return {"name": self.name, "value": self.value} + + +class DataFile(IsaBase): + mandatory_keys = ["@id", "name", "type", "comments"] + + def __init__(self, id, name, type, comments, derived_experiment_id) -> None: + super().__init__() + self.id: str = id + self.name: str = name + self.type: str = type + self.comments: List[DataFileComment] = comments + self.derived_experiment_id: str = derived_experiment_id + + @classmethod + def from_data_file_dict(self, data_file_dict: Dict, associations: Dict) -> None: + super().check_dict_keys(data_file_dict, self.mandatory_keys) + return DataFile( + id=data_file_dict["@id"], + name=data_file_dict["name"], + type=data_file_dict["type"], + comments=DataFileComment.from_dict(data_file_dict["comments"]), + derived_experiment_id=get_derived_expertiment_id( + associations, clip_off_prefix(data_file_dict["@id"]) + ), + ) + + def to_dict(self) -> Dict: + return { + "id": self.id, + "name": self.name, + "type": self.type, + "comments": [comment.to_dict() for comment in self.comments], + "derived_experiment_id": self.derived_experiment_id, + } + + +def fetch_run_alias(data_file: Dict): + return EnaRun.prefix + clip_off_prefix(data_file["@id"]) + + +def get_derived_expertiment_id(associations: List[Dict], data_file_id: str): + for association in associations: + if data_file_id in clip_off_prefix(association["output"]): + return association["input"][0] + + +def fetch_experiment_alias(data_file: DataFile) -> str: + return EnaRun.prefix + clip_off_prefix(data_file.derived_experiment_id) + + +class EnaRun(IsaBase): """ Generates a Run object, compliant to the requirements of ENA """ + mandatory_keys = ["dataFiles", "processSequence"] + prefix = "https://datahub.elixir-belgium.org/samples/" # TODO: Replace by something less hard-coded + def __init__( self, alias: str, experiment_alias: str, - filename: str, - file_type: str, + data_file: DataFile, ) -> None: + super().__init__() self.alias = alias - self.experiments = experiment_alias - self.filename = filename - self.file_type = file_type + self.experiment_alias = experiment_alias + self.data_file = data_file + + @classmethod + def from_study_dict(self, study_dict: Dict) -> None: + ena_runs = [] + + for assay in study_dict["assays"]: + super().check_dict_keys(assay, self.mandatory_keys) + sample_datafile_associations = get_assay_sample_associations(assay) + for data_file in assay["dataFiles"]: + current_data_file = DataFile.from_data_file_dict( + data_file, sample_datafile_associations + ) + ena_runs.append( + EnaRun( + alias=fetch_run_alias(data_file), + experiment_alias=fetch_experiment_alias(current_data_file), + data_file=current_data_file, + ) + ) + + return ena_runs + + def to_dict(self) -> Dict: + return { + "alias": self.alias, + "experiment_alias": self.experiment_alias, + "data_file": self.data_file.to_dict(), + } + + +def export_runs_to_dataframe(runs: List[EnaRun]) -> DataFrame: + ena_run_dicts = [run.to_dict() for run in runs] + flat_dicts = [] + for dict in ena_run_dicts: + data_file = dict.pop("data_file") + data_file_comments = data_file.pop("comments") + dict.update({"file_name": data_file["name"]}) + for dfc in data_file_comments: + dict.update({dfc["name"]: dfc["value"]}) + flat_dicts.append(dict) + return DataFrame.from_dict(flat_dicts) diff --git a/ena_objects/ena_sample.py b/ena_objects/ena_sample.py index bbceb66..6448b4c 100644 --- a/ena_objects/ena_sample.py +++ b/ena_objects/ena_sample.py @@ -111,7 +111,8 @@ def to_dict(self) -> Dict: "characteristics": self.characteristics, } - def from_study_dict(study_dict: Dict) -> None: + @classmethod + def from_study_dict(self, study_dict: Dict) -> None: """Generate sample objects from a study dictionary Args: diff --git a/ena_objects/ena_std_lib.py b/ena_objects/ena_std_lib.py index 970ade7..8b30d97 100644 --- a/ena_objects/ena_std_lib.py +++ b/ena_objects/ena_std_lib.py @@ -38,3 +38,13 @@ def validate_dict(dict: Dict, key: str) -> None: """ if key not in dict.keys(): raise KeyError(f"{key} was not found in the provided ISA JSON.") + + +def get_assay_sample_associations(assay_dict: Dict): + process_sequence = [] + for process in assay_dict["processSequence"]: + input_ids = [input["@id"] for input in process["inputs"]] + output_ids = [output["@id"] for output in process["outputs"]] + process_sequence.append({"input": input_ids, "output": output_ids}) + + return process_sequence diff --git a/ena_objects/ena_study.py b/ena_objects/ena_study.py index f64e6d1..57a15bc 100644 --- a/ena_objects/ena_study.py +++ b/ena_objects/ena_study.py @@ -1,5 +1,6 @@ from typing import List, Optional, Dict from pandas import DataFrame +from ena_objects.characteristic import IsaBase from ena_objects.ena_experiment import EnaExperiment from ena_objects.ena_run import EnaRun from ena_objects.ena_sample import EnaSample @@ -33,7 +34,7 @@ def study_alias(study_isa_json: str) -> str: return EnaStudy.prefix + seek_study_id -class EnaStudy: +class EnaStudy(IsaBase): """Generates a Study object, compliant to the requirements of ENA""" mandatory_keys = ["title", "description", "publications"] @@ -73,7 +74,8 @@ def to_dict(self): "pubmed_id": self.pubmed_id, } - def from_isa_json(isa_json: Dict): + @classmethod + def from_isa_json(self, isa_json: Dict): """Method that creates an EnaStudy with params from ISA JSON Dictionary Args: @@ -82,7 +84,7 @@ def from_isa_json(isa_json: Dict): Returns: EnaStudy: EnaStudy object """ - [validate_dict(isa_json, key) for key in EnaStudy.mandatory_keys] + super().check_dict_keys(isa_json, self.mandatory_keys) ena_studies = [] diff --git a/example_read_isa_json.py b/example_read_isa_json.py index 1d3b390..8e14fc6 100644 --- a/example_read_isa_json.py +++ b/example_read_isa_json.py @@ -1,5 +1,6 @@ import json from ena_objects.ena_experiment import export_experiments_to_dataframe +from ena_objects.ena_run import EnaRun, export_runs_to_dataframe from ena_objects.ena_sample import export_samples_to_dataframe @@ -22,4 +23,8 @@ experiments = EnaExperiment.from_study_dict(study_dict, study.alias) experiments_df = export_experiments_to_dataframe(experiments) print(experiments_df) + +runs = EnaRun.from_study_dict(study_dict) +runs_df = export_runs_to_dataframe(runs) +print(runs_df) print("Done!") From 2baec9423a6d5193dbaf713586a0c41969f6ccef Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Thu, 28 Sep 2023 21:27:18 +0200 Subject: [PATCH 26/62] Implement Submission --- ena_objects/ena_study.py | 9 +++---- ena_objects/ena_submission.py | 36 ++++++++++++++++++++++++++-- example_read_isa_json.py | 44 ++++++++++++++++++++++++----------- 3 files changed, 69 insertions(+), 20 deletions(-) diff --git a/ena_objects/ena_study.py b/ena_objects/ena_study.py index 57a15bc..a626982 100644 --- a/ena_objects/ena_study.py +++ b/ena_objects/ena_study.py @@ -89,8 +89,6 @@ def from_isa_json(self, isa_json: Dict): ena_studies = [] for study in isa_json["studies"]: - ena_samples = EnaSample.from_study_dict(study) - ena_studies.append( EnaStudy( alias=study_alias(study), @@ -98,8 +96,11 @@ def from_isa_json(self, isa_json: Dict): study_type="", # TODO: Replace by Custom metadata of the Assay level study_abstract=study["description"], new_study_type=None, - samples=ena_samples, - experiments=EnaExperiment, + samples=EnaSample.from_study_dict(study), + experiments=EnaExperiment.from_study_dict( + study, study_alias(study) + ), + runs=EnaRun.from_study_dict(study), pubmed_id=study_publication_ids( publication_isa_json=study["publications"] ), diff --git a/ena_objects/ena_submission.py b/ena_objects/ena_submission.py index c8abf60..c792ca1 100644 --- a/ena_objects/ena_submission.py +++ b/ena_objects/ena_submission.py @@ -1,8 +1,21 @@ from typing import List, Dict +from pandas import DataFrame +import pandas +from ena_objects.ena_experiment import export_experiments_to_dataframe +from ena_objects.ena_run import export_runs_to_dataframe +from ena_objects.ena_sample import export_samples_to_dataframe + from ena_objects.ena_study import EnaStudy +def merge_df_by_key( + dataframe_dict_list: List[Dict[str, DataFrame]], key: str +) -> Dict[str, DataFrame]: + filtered_list = list(map(lambda d: d[key], dataframe_dict_list)) + return pandas.concat(filtered_list) + + class EnaSubmission: """ Generates a Submission object, compliant to the requirements of ENA @@ -19,5 +32,24 @@ def from_isa_json(isa_json: Dict) -> None: studies=EnaStudy.from_isa_json(isa_json), ) - def generate_dataframes(): - pass + def generate_dataframes(self) -> Dict[str, DataFrame]: + dataframes = [] + for study in self.studies: + study_df = EnaStudy.to_dataframe(study) + samples_df = export_samples_to_dataframe(study.samples) + experiments_df = export_experiments_to_dataframe(study.experiments) + runs_df = export_runs_to_dataframe(study.runs) + dataframes.append( + { + "study_df": study_df, + "samples_df": samples_df, + "experiments_df": experiments_df, + "runs_df": runs_df, + } + ) + return { + "study": merge_df_by_key(dataframes, "study_df"), + "samples": merge_df_by_key(dataframes, "samples_df"), + "experiments": merge_df_by_key(dataframes, "experiments_df"), + "runs": merge_df_by_key(dataframes, "runs_df"), + } diff --git a/example_read_isa_json.py b/example_read_isa_json.py index 8e14fc6..00c0bdf 100644 --- a/example_read_isa_json.py +++ b/example_read_isa_json.py @@ -1,30 +1,46 @@ import json +import os from ena_objects.ena_experiment import export_experiments_to_dataframe from ena_objects.ena_run import EnaRun, export_runs_to_dataframe from ena_objects.ena_sample import export_samples_to_dataframe from ena_objects.ena_study import EnaStudy, EnaSample, EnaExperiment +from ena_objects.ena_submission import EnaSubmission # Read json file isa_json_file = open("tests/test_data/isa_json_test_investigation.json") isa_json = json.load(isa_json_file) -studies = EnaStudy.from_isa_json(isa_json) -study_dfs = [study.to_dataframe() for study in studies] -print(study_dfs[0]) -study = studies[0] -study_dict = isa_json["studies"][0] +# studies = EnaStudy.from_isa_json(isa_json) +# study_dfs = [study.to_dataframe() for study in studies] +# print(study_dfs[0]) +# study = studies[0] +# study_dict = isa_json["studies"][0] -samples = EnaSample.from_study_dict(study_dict) -samples_df = export_samples_to_dataframe(samples) -print(samples_df) +# samples = EnaSample.from_study_dict(study_dict) +# samples_df = export_samples_to_dataframe(samples) +# print(samples_df) + +# experiments = EnaExperiment.from_study_dict(study_dict, study.alias) +# experiments_df = export_experiments_to_dataframe(experiments) +# print(experiments_df) + +# runs = EnaRun.from_study_dict(study_dict) +# runs_df = export_runs_to_dataframe(runs) +# print(runs_df) + +outputfolder = "./output_folder/" + +if not os.path.exists(outputfolder): + os.makedirs(outputfolder) + +submission = EnaSubmission.from_isa_json(isa_json) +submission_dfs = submission.generate_dataframes() +for k, df in submission_dfs.items(): + print(f"Dataframe {k}:") + print(df) + df.to_excel(f"{outputfolder}{k}.xlsx") -experiments = EnaExperiment.from_study_dict(study_dict, study.alias) -experiments_df = export_experiments_to_dataframe(experiments) -print(experiments_df) -runs = EnaRun.from_study_dict(study_dict) -runs_df = export_runs_to_dataframe(runs) -print(runs_df) print("Done!") From 3426dcf692d6ee37b8f055deb771c90d90dd1720 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 29 Sep 2023 07:19:30 +0200 Subject: [PATCH 27/62] Cleaning up classes --- ena_objects/characteristic.py | 101 ++++++------------- ena_objects/ena_experiment.py | 11 +- ena_objects/other_material.py | 8 +- ena_objects/other_material_characteristic.py | 18 ---- ena_objects/parameter_value.py | 19 ---- 5 files changed, 43 insertions(+), 114 deletions(-) delete mode 100644 ena_objects/other_material_characteristic.py delete mode 100644 ena_objects/parameter_value.py diff --git a/ena_objects/characteristic.py b/ena_objects/characteristic.py index e471266..5c351d1 100644 --- a/ena_objects/characteristic.py +++ b/ena_objects/characteristic.py @@ -15,74 +15,6 @@ def check_dict_keys(self, dict: Dict, mandatory_keys): [validate_dict(dict=dict, key=key) for key in mandatory_keys] -# class Category(IsaBase): -# """ -# This represents a category object in a Characteristic -# """ - -# def __init__(self, id: str, name: str) -> None: -# self.id = id -# self.name = name - -# mandatory_keys = ["@id"] - -# @classmethod -# def from_dict(self, dict: Dict, categories: Dict): -# super().check_dict_keys(dict, self.mandatory_keys) - -# return self(id=dict["@id"], name=fetch_category_name(categories, dict)) - - -# class Value(IsaBase): -# """ -# This represents a Value object in a Characteristic -# """ - -# mandatory_keys = ["annotationValue", "termSource", "termAccession"] - -# def __init__( -# self, annotation_value: str, term_source: str = "", term_accession: str = "" -# ) -> None: -# self.annotation_value = annotation_value -# self.term_source = term_source -# self.term_accession = term_accession - -# @classmethod -# def from_dict(self, dict: Dict): -# super().check_dict_keys(dict, self.mandatory_keys) - -# return Value( -# annotation_value=dict["annotationValue"], -# term_accession=dict["termAccession"], -# term_source=dict["termSource"], -# ) - - -# class Unit(IsaBase): -# """ -# This represents the Unit object in a Characteristic -# """ - -# mandatory_keys = ["termSource", "termAccession", "comments"] - -# def __init__( -# self, term_source: str, term_accession: str, comments: List[str] -# ) -> None: -# self.term_source = term_source -# self.term_accession = term_accession -# self.comments = comments - -# @classmethod -# def from_dict(self, dict: Dict): -# super().check_dict_keys(dict, self.mandatory_keys) - -# return Unit( -# term_source=dict["termSource"], -# term_accession=dict["termAccession"], -# comments=dict["comments"], -# ) - - def fetch_category_name(categories: Dict, name: str) -> str: for cat in categories: if name["@id"] == cat["id"]: @@ -123,3 +55,36 @@ def to_dict(self) -> Dict: "category": self.category, "value": self.value, } + + +class OtherMaterialCharacteristic(Characteristic): + """ + This class represents the other material object. + """ + + def __init__(self, category: Dict, value: str) -> None: + super().__init__(category, value) + + @classmethod + def from_dict(cls, dict: Dict, characteristics_categories: Dict): + return super().from_dict(dict, characteristics_categories) + + def to_dict(self) -> Dict: + return super().to_dict() + + +class ParameterValue(Characteristic): + """ + This class represents a paramenter value in the isa study + and is inherited from the Characteristic class + """ + + def __init__(self, category: Dict, value: str) -> None: + super().__init__(category, value) + + @classmethod + def from_dict(self, dict: Dict, parameters: Dict): + return super().from_dict(dict, parameters) + + def to_dict(self) -> Dict: + return super().to_dict() diff --git a/ena_objects/ena_experiment.py b/ena_objects/ena_experiment.py index 8b0ce8f..c206038 100644 --- a/ena_objects/ena_experiment.py +++ b/ena_objects/ena_experiment.py @@ -4,10 +4,12 @@ from pandas import DataFrame from ena_objects.ena_std_lib import get_assay_sample_associations -from ena_objects.characteristic import IsaBase +from ena_objects.characteristic import ( + IsaBase, + OtherMaterialCharacteristic, + ParameterValue, +) from ena_objects.ena_sample import EnaSample -from ena_objects.other_material_characteristic import OtherMaterialCharacteristic -from ena_objects.parameter_value import ParameterValue from ena_objects.other_material import OtherMaterial @@ -156,11 +158,8 @@ def to_dict(self) -> Dict: @classmethod def from_study_dict(self, study_dict: Dict, study_alias: str): super().check_dict_keys(study_dict, self.mandatory_keys) - # [validate_dict(study_dict, key) for key in EnaExperiment.mandatory_keys] other_materials = get_other_materials(study_dict) - - # protocol_parameters = protocol_parameters(study_dict["protocols"]) parameter_values = get_parameter_values(study_dict) ena_experiments = [] diff --git a/ena_objects/other_material.py b/ena_objects/other_material.py index a50fe41..e295271 100644 --- a/ena_objects/other_material.py +++ b/ena_objects/other_material.py @@ -1,7 +1,9 @@ from ena_objects.ena_std_lib import validate_dict -from ena_objects.parameter_value import ParameterValue -from ena_objects.other_material_characteristic import OtherMaterialCharacteristic -from ena_objects.characteristic import IsaBase +from ena_objects.characteristic import ( + IsaBase, + ParameterValue, + OtherMaterialCharacteristic, +) from typing import List, Dict diff --git a/ena_objects/other_material_characteristic.py b/ena_objects/other_material_characteristic.py deleted file mode 100644 index 88c03da..0000000 --- a/ena_objects/other_material_characteristic.py +++ /dev/null @@ -1,18 +0,0 @@ -from typing import Dict -from ena_objects.characteristic import Characteristic - - -class OtherMaterialCharacteristic(Characteristic): - """ - This class represents the other material object. - """ - - def __init__(self, category: Dict, value: str) -> None: - super().__init__(category, value) - - @classmethod - def from_dict(cls, dict: Dict, characteristics_categories: Dict): - return super().from_dict(dict, characteristics_categories) - - def to_dict(self) -> Dict: - return super().to_dict() diff --git a/ena_objects/parameter_value.py b/ena_objects/parameter_value.py deleted file mode 100644 index b85f250..0000000 --- a/ena_objects/parameter_value.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Dict -from ena_objects.characteristic import Characteristic - - -class ParameterValue(Characteristic): - """ - This class represents a paramenter value in the isa study - and is inherited from the Characteristic class - """ - - def __init__(self, category: Dict, value: str) -> None: - super().__init__(category, value) - - @classmethod - def from_dict(self, dict: Dict, parameters: Dict): - return super().from_dict(dict, parameters) - - def to_dict(self) -> Dict: - return super().to_dict() From 3a4b4effc78fdb47b99287ae1d7b384577644493 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 29 Sep 2023 07:55:34 +0200 Subject: [PATCH 28/62] Implement Characteristic class for Ena Sample --- ena_objects/characteristic.py | 36 +++++++++++++++++++++++++---------- ena_objects/ena_sample.py | 31 +++++++++++++++--------------- 2 files changed, 41 insertions(+), 26 deletions(-) diff --git a/ena_objects/characteristic.py b/ena_objects/characteristic.py index 5c351d1..5671da4 100644 --- a/ena_objects/characteristic.py +++ b/ena_objects/characteristic.py @@ -11,11 +11,11 @@ class IsaBase: """ @classmethod - def check_dict_keys(self, dict: Dict, mandatory_keys): + def check_dict_keys(self, dict: Dict[str, str], mandatory_keys): [validate_dict(dict=dict, key=key) for key in mandatory_keys] -def fetch_category_name(categories: Dict, name: str) -> str: +def fetch_category_name(categories: Dict[str, str], name: str) -> str: for cat in categories: if name["@id"] == cat["id"]: if "name" in cat: @@ -24,7 +24,7 @@ def fetch_category_name(categories: Dict, name: str) -> str: return cat["value"] -def category_dict(dict: Dict, categories: Dict): +def category_dict(dict: Dict[str, str], categories: Dict[str, str]): category_name = fetch_category_name(categories, dict) category_id = dict["@id"] return {"id": category_id, "name": category_name} @@ -43,14 +43,14 @@ def __init__(self, category: Dict, value: str) -> None: self.value = value @classmethod - def from_dict(self, dict: Dict, categories: Dict): + def from_dict(self, dict: Dict[str, str], categories: Dict[str, str]): super().check_dict_keys(dict, self.mandatory_keys) return self( category=category_dict(dict["category"], categories), value=dict["value"]["annotationValue"], ) - def to_dict(self) -> Dict: + def to_dict(self) -> Dict[str, str]: return { "category": self.category, "value": self.value, @@ -66,10 +66,12 @@ def __init__(self, category: Dict, value: str) -> None: super().__init__(category, value) @classmethod - def from_dict(cls, dict: Dict, characteristics_categories: Dict): + def from_dict( + cls, dict: Dict[str, str], characteristics_categories: Dict[str, str] + ): return super().from_dict(dict, characteristics_categories) - def to_dict(self) -> Dict: + def to_dict(self) -> Dict[str, str]: return super().to_dict() @@ -79,12 +81,26 @@ class ParameterValue(Characteristic): and is inherited from the Characteristic class """ - def __init__(self, category: Dict, value: str) -> None: + def __init__(self, category: Dict[str, str], value: str) -> None: super().__init__(category, value) @classmethod - def from_dict(self, dict: Dict, parameters: Dict): + def from_dict(self, dict: Dict[str, str], parameters: Dict[str, str]): return super().from_dict(dict, parameters) - def to_dict(self) -> Dict: + def to_dict(self) -> Dict[str, str]: + return super().to_dict() + + +class SampleCharacteristic(Characteristic): + def __init__(self, category: Dict, value: str) -> None: + super().__init__(category, value) + + @classmethod + def from_dict( + self, dict: Dict[str, str], characteristics_categories: Dict[str, str] + ): + return super().from_dict(dict, characteristics_categories) + + def to_dict(self) -> Dict[str, str]: return super().to_dict() diff --git a/ena_objects/ena_sample.py b/ena_objects/ena_sample.py index 6448b4c..a9acfda 100644 --- a/ena_objects/ena_sample.py +++ b/ena_objects/ena_sample.py @@ -1,10 +1,11 @@ import re from typing import List, Dict +from ena_objects.characteristic import SampleCharacteristic from pandas import DataFrame -def study_characteristic_category_name(study_dict: Dict, id: str) -> Dict: +def fetch_characteristic_categories(study_dict: Dict) -> Dict: """Retrieves the name of a characteristic id Args: @@ -14,15 +15,11 @@ def study_characteristic_category_name(study_dict: Dict, id: str) -> Dict: Returns: Dict: characteristic name corresponding with the ID """ - char_cat_dicts = [ + return [ {"id": cc["@id"], "name": cc["characteristicType"]["annotationValue"]} for cc in study_dict["characteristicCategories"] ] - for ccd in char_cat_dicts: - if ccd["id"] == id: - return ccd["name"] - def fetch_characteristics(sample_dict: Dict, study_dict: Dict) -> List[Dict]: """Fetches the characteristics from the given sample dictionary @@ -34,14 +31,16 @@ def fetch_characteristics(sample_dict: Dict, study_dict: Dict) -> List[Dict]: Returns: List[Dict]: List of characteristic dictionaries """ + characteristic_categories = fetch_characteristic_categories(study_dict) return [ - { - "category_id": char["category"]["@id"], - "category_name": study_characteristic_category_name( - study_dict, char["category"]["@id"] - ), - "value": char["value"]["annotationValue"], - } + SampleCharacteristic.from_dict(char, characteristic_categories) + # { + # "category_id": char["category"]["@id"], + # "category_name": study_characteristic_category_name( + # study_dict, char["category"]["@id"] + # ), + # "value": char["value"]["annotationValue"], + # } for char in sample_dict["characteristics"] ] @@ -101,14 +100,14 @@ class EnaSample: prefix: str = "https://datahub.elixir-belgium.org/samples/" # TODO: Replace by something less hard-coded - def __init__(self, characteristics: Dict, alias: str) -> None: + def __init__(self, characteristics: List[SampleCharacteristic], alias: str) -> None: self.alias = alias self.characteristics = characteristics def to_dict(self) -> Dict: return { "alias": self.alias, - "characteristics": self.characteristics, + "characteristics": [char.to_dict() for char in self.characteristics], } @classmethod @@ -167,7 +166,7 @@ def export_samples_to_dataframe(samples: List[EnaSample]): sample_dict = sample.to_dict() characteristics = sample_dict.pop("characteristics") for char in characteristics: - sample_dict.update({char["category_name"]: char["value"]}) + sample_dict.update({char["category"]["name"]: char["value"]}) flat_dicts.append(sample_dict) return DataFrame.from_dict(flat_dicts) From 6e8c400fbdc10090ecc87acdc4628607beead17b Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 29 Sep 2023 08:00:27 +0200 Subject: [PATCH 29/62] Clean up example script --- example_read_isa_json.py | 37 ++++++++----------------------------- 1 file changed, 8 insertions(+), 29 deletions(-) diff --git a/example_read_isa_json.py b/example_read_isa_json.py index 00c0bdf..fc30d1b 100644 --- a/example_read_isa_json.py +++ b/example_read_isa_json.py @@ -1,46 +1,25 @@ import json import os -from ena_objects.ena_experiment import export_experiments_to_dataframe -from ena_objects.ena_run import EnaRun, export_runs_to_dataframe - -from ena_objects.ena_sample import export_samples_to_dataframe - -from ena_objects.ena_study import EnaStudy, EnaSample, EnaExperiment from ena_objects.ena_submission import EnaSubmission # Read json file isa_json_file = open("tests/test_data/isa_json_test_investigation.json") isa_json = json.load(isa_json_file) -# studies = EnaStudy.from_isa_json(isa_json) -# study_dfs = [study.to_dataframe() for study in studies] -# print(study_dfs[0]) -# study = studies[0] -# study_dict = isa_json["studies"][0] - -# samples = EnaSample.from_study_dict(study_dict) -# samples_df = export_samples_to_dataframe(samples) -# print(samples_df) - -# experiments = EnaExperiment.from_study_dict(study_dict, study.alias) -# experiments_df = export_experiments_to_dataframe(experiments) -# print(experiments_df) - -# runs = EnaRun.from_study_dict(study_dict) -# runs_df = export_runs_to_dataframe(runs) -# print(runs_df) - +# Change this to 'True' if you want to export the resulting DataFrames to an xlsx. +export_to_excel = False outputfolder = "./output_folder/" -if not os.path.exists(outputfolder): - os.makedirs(outputfolder) - submission = EnaSubmission.from_isa_json(isa_json) submission_dfs = submission.generate_dataframes() + +if (not os.path.exists(outputfolder)) and export_to_excel: + os.makedirs(outputfolder) + for k, df in submission_dfs.items(): print(f"Dataframe {k}:") print(df) - df.to_excel(f"{outputfolder}{k}.xlsx") - + if export_to_excel: + df.to_excel(f"{outputfolder}{k}.xlsx") print("Done!") From 766246ad0e8864c44dd3c61bd6bb9e1cd6e5fef9 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 29 Sep 2023 08:33:36 +0200 Subject: [PATCH 30/62] Move clip_off_prefix to the common ena_std_lib module. --- ena_objects/ena_experiment.py | 24 ++---------------- ena_objects/ena_run.py | 3 +-- ena_objects/ena_std_lib.py | 46 ++++++++++++++++++++++++++++++++--- 3 files changed, 46 insertions(+), 27 deletions(-) diff --git a/ena_objects/ena_experiment.py b/ena_objects/ena_experiment.py index c206038..15f5eaa 100644 --- a/ena_objects/ena_experiment.py +++ b/ena_objects/ena_experiment.py @@ -1,9 +1,8 @@ -import re -from typing import List, Dict, Union +from typing import List, Dict from pandas import DataFrame -from ena_objects.ena_std_lib import get_assay_sample_associations +from ena_objects.ena_std_lib import get_assay_sample_associations, clip_off_prefix from ena_objects.characteristic import ( IsaBase, OtherMaterialCharacteristic, @@ -13,25 +12,6 @@ from ena_objects.other_material import OtherMaterial -def clip_off_prefix(alias: Union[str, List[str]]) -> Union[str, List[str]]: - if isinstance(alias, str): - result = re.split("/", alias)[-1] - elif isinstance(alias, list): - result = [] - for item in alias: - if isinstance(item, str): - result.append(re.split("/", item)[-1]) - else: - raise TypeError( - "The 'clip_off_prefix' function only accepts strings or a list of strings" - ) - else: - raise TypeError( - "The 'clip_off_prefix' function only accepts strings or a list of strings" - ) - return result - - def experiment_alias(other_material: OtherMaterial): seek_assays_id: str = clip_off_prefix(other_material.id) return EnaExperiment.prefix + seek_assays_id diff --git a/ena_objects/ena_run.py b/ena_objects/ena_run.py index 120549a..76e969b 100644 --- a/ena_objects/ena_run.py +++ b/ena_objects/ena_run.py @@ -3,8 +3,7 @@ from pandas import DataFrame from ena_objects.characteristic import IsaBase -from ena_objects.ena_experiment import clip_off_prefix -from ena_objects.ena_std_lib import get_assay_sample_associations +from ena_objects.ena_std_lib import get_assay_sample_associations, clip_off_prefix class DataFileComment(IsaBase): diff --git a/ena_objects/ena_std_lib.py b/ena_objects/ena_std_lib.py index 8b30d97..3151183 100644 --- a/ena_objects/ena_std_lib.py +++ b/ena_objects/ena_std_lib.py @@ -1,5 +1,5 @@ -from ast import List, ClassDef -from typing import Dict +from typing import Dict, List, Union +import re def filter_attribute_by(attribute_list: str, key: str, value: str) -> Dict: @@ -40,7 +40,16 @@ def validate_dict(dict: Dict, key: str) -> None: raise KeyError(f"{key} was not found in the provided ISA JSON.") -def get_assay_sample_associations(assay_dict: Dict): +def get_assay_sample_associations(assay_dict: Dict[str, str]) -> List[Dict[str, str]]: + """Fetches the list of sample assocations in a specified assay dictionary. + Each dictionary contains a list of input ids and output ids. + + Args: + assay_dict (Dict[str, str]): input assay dictionary + + Returns: + List[Dict[str, str]]: List of dictionaries with the associations + """ process_sequence = [] for process in assay_dict["processSequence"]: input_ids = [input["@id"] for input in process["inputs"]] @@ -48,3 +57,34 @@ def get_assay_sample_associations(assay_dict: Dict): process_sequence.append({"input": input_ids, "output": output_ids}) return process_sequence + + +def clip_off_prefix(alias: Union[str, List[str]]) -> Union[str, List[str]]: + """Clips off any prefix separated by the '/' character and returns the last subelement. + The input can be a single String or a list of Strings. + + Args: + alias (Union[str, List[str]]): Single alias or List of aliases + + Raises: + TypeError: If the type of the input is anything other than a String or a list of Strings, an Exception is raised. + + Returns: + Union[str, List[str]]: Depending on the input, returns a single String or a list of Strings. + """ + if isinstance(alias, str): + result = re.split("/", alias)[-1] + elif isinstance(alias, list): + result = [] + for item in alias: + if isinstance(item, str): + result.append(re.split("/", item)[-1]) + else: + raise TypeError( + "The 'clip_off_prefix' function only accepts strings or a list of strings" + ) + else: + raise TypeError( + "The 'clip_off_prefix' function only accepts strings or a list of strings" + ) + return result From 9e009bdb93878db9a954763c7621cde12c1f9648 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 29 Sep 2023 09:34:39 +0200 Subject: [PATCH 31/62] Annotation of the classes and modules --- ena_objects/characteristic.py | 33 +++++++++-- ena_objects/ena_experiment.py | 108 ++++++++++++++++++++++++++++++---- ena_objects/ena_run.py | 97 +++++++++++++++++++++++++++--- ena_objects/ena_sample.py | 7 --- ena_objects/ena_submission.py | 29 ++++++++- ena_objects/other_material.py | 16 ++++- 6 files changed, 253 insertions(+), 37 deletions(-) diff --git a/ena_objects/characteristic.py b/ena_objects/characteristic.py index 5671da4..29dcc20 100644 --- a/ena_objects/characteristic.py +++ b/ena_objects/characteristic.py @@ -24,7 +24,16 @@ def fetch_category_name(categories: Dict[str, str], name: str) -> str: return cat["value"] -def category_dict(dict: Dict[str, str], categories: Dict[str, str]): +def category_dict(dict: Dict[str, str], categories: Dict[str, str]) -> Dict[str, str]: + """Matches the category ID to a category name and returns a dictionary of the category. + + Args: + dict (Dict[str, str]): category dictionary + categories (Dict[str, str]): Dictionary of the characteristics to match + + Returns: + Dict[str, str]: Modified category dictionary + """ category_name = fetch_category_name(categories, dict) category_id = dict["@id"] return {"id": category_id, "name": category_name} @@ -32,7 +41,7 @@ def category_dict(dict: Dict[str, str], categories: Dict[str, str]): class Characteristic(IsaBase): """ - This is the base class of a characteristics object. + This is the generic base class of a characteristics object. """ mandatory_keys = ["category", "value", "unit"] @@ -43,7 +52,16 @@ def __init__(self, category: Dict, value: str) -> None: self.value = value @classmethod - def from_dict(self, dict: Dict[str, str], categories: Dict[str, str]): + def from_dict(self, dict: Dict[str, str], categories: List[Dict[str, str]]) -> None: + """Creates a characteristic object from a dictionary + + Args: + dict (Dict[str, str]): Characteristics dictionary + categories (List[Dict[str, str]]): List of all characteristics categories + + Returns: + Characteristic: _description_ + """ super().check_dict_keys(dict, self.mandatory_keys) return self( category=category_dict(dict["category"], categories), @@ -59,7 +77,7 @@ def to_dict(self) -> Dict[str, str]: class OtherMaterialCharacteristic(Characteristic): """ - This class represents the other material object. + This class represents a Characteristic for the other material object. """ def __init__(self, category: Dict, value: str) -> None: @@ -78,7 +96,7 @@ def to_dict(self) -> Dict[str, str]: class ParameterValue(Characteristic): """ This class represents a paramenter value in the isa study - and is inherited from the Characteristic class + and extends the Characteristic class. """ def __init__(self, category: Dict[str, str], value: str) -> None: @@ -93,6 +111,11 @@ def to_dict(self) -> Dict[str, str]: class SampleCharacteristic(Characteristic): + """ + This class represents a Sample Characteristic in the isa study + and extends the Characteristic class. + """ + def __init__(self, category: Dict, value: str) -> None: super().__init__(category, value) diff --git a/ena_objects/ena_experiment.py b/ena_objects/ena_experiment.py index 15f5eaa..23c279b 100644 --- a/ena_objects/ena_experiment.py +++ b/ena_objects/ena_experiment.py @@ -1,4 +1,4 @@ -from typing import List, Dict +from typing import List, Dict, Union from pandas import DataFrame @@ -12,12 +12,30 @@ from ena_objects.other_material import OtherMaterial -def experiment_alias(other_material: OtherMaterial): +def experiment_alias(other_material: OtherMaterial) -> str: + """Generates an alias for the experiment, starting from an other_material + and the prefix specified in the class. + + Args: + other_material (OtherMaterial): _description_ + + Returns: + str: _description_ + """ seek_assays_id: str = clip_off_prefix(other_material.id) return EnaExperiment.prefix + seek_assays_id -def fetch_characteristic_categories(study_dict: Dict): +def fetch_characteristic_categories(study_dict: Dict[str, str]) -> List[Dict[str, str]]: + """Fetches all characteristics categories from a provided study dictionary + and returns them as a list of characteristics categories. + + Args: + study_dict (Dict[str, str]): Input study dictionary + + Returns: + List[Dict[str, str]]: List of the characteristics categories + """ categories = [] for assay in study_dict["assays"]: for cc in assay["characteristicCategories"]: @@ -27,10 +45,18 @@ def fetch_characteristic_categories(study_dict: Dict): return categories -def get_other_materials(study_dict: Dict) -> List[OtherMaterial]: +def get_other_materials(study_dict: Dict[str, str]) -> List[OtherMaterial]: + """Returns a List of 'other materials' from a study dictionary + and returns them as a list of OtherMaterial objects. + + Args: + study_dict (Dict[str, str]): Input study dictionary + + Returns: + List[OtherMaterial]: Resulting list of OtherMaterial objects + """ other_materials = [] characteristics_categories = fetch_characteristic_categories(study_dict) - # parameters = fetch_parameters() for assay in study_dict["assays"]: for om in assay["materials"]["otherMaterials"]: other_material = OtherMaterial.from_dict( @@ -41,13 +67,33 @@ def get_other_materials(study_dict: Dict) -> List[OtherMaterial]: return other_materials -def library_names(study_dict: Dict) -> List[str]: +def library_names(study_dict: Dict[str, str]) -> List[str]: + """Returns a list of library names from a study dictionary. + + Args: + study_dict (Dict[str, str]): Input study dictionary + + Returns: + List[str]: Resulting list of library names + """ return [om["name"] for om in get_other_materials(study_dict)] def get_derived_sample_alias( - other_material: OtherMaterial, study_dict: Dict, return_multiple: bool = False -) -> str: + other_material: OtherMaterial, + study_dict: Dict[str, str], + return_multiple: bool = False, +) -> Union[str, List[str]]: + """Gets Sample ids, an 'other material' is derived from. + + Args: + other_material (OtherMaterial): other material + study_dict (Dict): Input study dictioary + return_multiple (bool, optional): Optional flag to return multiple sample ID's per 'other material'. Defaults to False. + + Returns: + str: Resulting derived sample id or list of derived sample id's + """ assoc_sample_ids = [] for assay in study_dict["assays"]: sample_associations = get_assay_sample_associations(assay) @@ -65,7 +111,15 @@ def get_derived_sample_alias( return assoc_sample_ids -def fetch_parameters(protocol_dict: Dict): +def fetch_parameters(protocol_dict: Dict[str, str]) -> List[Dict[str, str]]: + """Fetches the parameters from a protocol dictionary. + + Args: + protocol_dict (Dict[str, str]): protocol dictionary + + Returns: + List[Dict[str, str]]: Resulting list of parameters + """ parameters = [] for protocol in protocol_dict: for parameter in protocol["parameters"]: @@ -78,7 +132,15 @@ def fetch_parameters(protocol_dict: Dict): return parameters -def get_parameter_values(study_dict: Dict) -> Dict: +def get_parameter_values(study_dict: Dict[str, str]) -> Dict[str, str]: + """Returns all parameter values from a study dictionary. + + Args: + study_dict (Dict[str, str]): Input study dictionary + + Returns: + Dict[str, str]: Resulting dictionary of parameter values. + """ param_vals = [] parameters = fetch_parameters(study_dict["protocols"]) for assay in study_dict["assays"]: @@ -123,7 +185,12 @@ def __init__( self.parameter_values = parameter_values self.other_material_characteristics = other_material_characteristics - def to_dict(self) -> Dict: + def to_dict(self) -> Dict[str, str]: + """Returns the EnaExperiment object as a dictionary. + + Returns: + Dict[str, str]: Resulting dictionary with EnaExperiment information + """ return { "alias": self.alias, "study_alias": self.sample_alias, @@ -136,7 +203,16 @@ def to_dict(self) -> Dict: } @classmethod - def from_study_dict(self, study_dict: Dict, study_alias: str): + def from_study_dict(self, study_dict: Dict[str, str], study_alias: str) -> None: + """Generates a EnaExperiment object from a study dictionary. + + Args: + study_dict (Dict[str, str]): Input study dictionary + study_alias (str): Alias of the study associated with the experiments + + Returns: + EnaExperiment: Resulting EnaExperiment object + """ super().check_dict_keys(study_dict, self.mandatory_keys) other_materials = get_other_materials(study_dict) @@ -169,6 +245,14 @@ def from_study_dict(self, study_dict: Dict, study_alias: str): def export_experiments_to_dataframe(experiments: List[EnaExperiment]) -> DataFrame: + """Exports the information out of a list of EnaExperiment to a pandas DataFrame + + Args: + experiments (List[EnaExperiment]): Input list of experiments + + Returns: + DataFrame: Resulting DataFrame + """ flat_dicts = [] for experiment in experiments: experiment_dict = experiment.to_dict() diff --git a/ena_objects/ena_run.py b/ena_objects/ena_run.py index 76e969b..9f77699 100644 --- a/ena_objects/ena_run.py +++ b/ena_objects/ena_run.py @@ -7,6 +7,10 @@ class DataFileComment(IsaBase): + """Object representation of a data file comment in the ISA JSON. + Extends the IsaBase class. + """ + mandatory_keys = ["name", "value"] def __init__(self, name: str, value: str) -> None: @@ -15,7 +19,15 @@ def __init__(self, name: str, value: str) -> None: self.value = value @classmethod - def from_dict(self, comments_dict) -> None: + def from_dict(self, comments_dict: Dict[str, str]) -> None: + """Generates a DataFileComment from comment dictionary. + + Args: + comments_dict (Dict[str, str]): Input data file comment dictionary + + Returns: + DataFileComment: Resulting DataFileComment + """ for comment in comments_dict: super().check_dict_keys(dict=comment, mandatory_keys=self.mandatory_keys) @@ -29,6 +41,10 @@ def to_dict(self) -> Dict: class DataFile(IsaBase): + """Object representation of a data file in the ISA JSON. + Extends the IsaBase class. + """ + mandatory_keys = ["@id", "name", "type", "comments"] def __init__(self, id, name, type, comments, derived_experiment_id) -> None: @@ -40,7 +56,18 @@ def __init__(self, id, name, type, comments, derived_experiment_id) -> None: self.derived_experiment_id: str = derived_experiment_id @classmethod - def from_data_file_dict(self, data_file_dict: Dict, associations: Dict) -> None: + def from_data_file_dict( + self, data_file_dict: Dict[str, str], associations: Dict[str, str] + ) -> None: + """Generates a DataFile from a data file dictionary and dictionary of data file associations. + + Args: + data_file_dict (Dict[str, str]): data file dictionary + associations (Dict[str, str]): data file associations dictionar + + Returns: + DataFile: Resulting DataFile + """ super().check_dict_keys(data_file_dict, self.mandatory_keys) return DataFile( id=data_file_dict["@id"], @@ -52,7 +79,12 @@ def from_data_file_dict(self, data_file_dict: Dict, associations: Dict) -> None: ), ) - def to_dict(self) -> Dict: + def to_dict(self) -> Dict[str, str]: + """Converts the DataFile object into a dictionary. + + Returns: + Dict[str, str]: Resulting dictionary + """ return { "id": self.id, "name": self.name, @@ -62,23 +94,51 @@ def to_dict(self) -> Dict: } -def fetch_run_alias(data_file: Dict): +def fetch_run_alias(data_file: Dict[str, str]) -> str: + """Generates an alias for the run, based on the data file dictionary + and prefix specified in the Class + + Args: + data_file (Dict[str, str]): Input data file dictionary + + Returns: + str: Resulting alias + """ return EnaRun.prefix + clip_off_prefix(data_file["@id"]) -def get_derived_expertiment_id(associations: List[Dict], data_file_id: str): +def get_derived_expertiment_id( + associations: List[Dict[str, str]], data_file_id: str +) -> str: + """Fetches the derived sample id from data file id and a list of datafile - experiment associations. + + Args: + associations (List[Dict[str, str]]): list of sample - experiment associations + data_file_id (str): data file id + + Returns: + str: resulting derived experiment id + """ for association in associations: if data_file_id in clip_off_prefix(association["output"]): return association["input"][0] def fetch_experiment_alias(data_file: DataFile) -> str: + """Generates the experiment alias from the information in the provided data file. + + Args: + data_file (DataFile): Input data file + + Returns: + str: associated experiment alias + """ return EnaRun.prefix + clip_off_prefix(data_file.derived_experiment_id) class EnaRun(IsaBase): """ - Generates a Run object, compliant to the requirements of ENA + Generates a Run object, compliant to the requirements of ENA. """ mandatory_keys = ["dataFiles", "processSequence"] @@ -96,7 +156,15 @@ def __init__( self.data_file = data_file @classmethod - def from_study_dict(self, study_dict: Dict) -> None: + def from_study_dict(self, study_dict: Dict[str, str]) -> None: + """Generates a EnaRun object from a study dictionary. + + Args: + study_dict (Dict[str, str]): Input study dictionary + + Returns: + EnaRun: Resulting run + """ ena_runs = [] for assay in study_dict["assays"]: @@ -116,7 +184,12 @@ def from_study_dict(self, study_dict: Dict) -> None: return ena_runs - def to_dict(self) -> Dict: + def to_dict(self) -> Dict[str, str]: + """Converts the EnaRun object into a dictionary + + Returns: + Dict[str, str]: resulting dictionary + """ return { "alias": self.alias, "experiment_alias": self.experiment_alias, @@ -125,6 +198,14 @@ def to_dict(self) -> Dict: def export_runs_to_dataframe(runs: List[EnaRun]) -> DataFrame: + """Exports a list of EnaRun to a pandas DataFrame + + Args: + runs (List[EnaRun]): input list of EnaRun + + Returns: + DataFrame: Resulting pandas DataFrame + """ ena_run_dicts = [run.to_dict() for run in runs] flat_dicts = [] for dict in ena_run_dicts: diff --git a/ena_objects/ena_sample.py b/ena_objects/ena_sample.py index a9acfda..c33df99 100644 --- a/ena_objects/ena_sample.py +++ b/ena_objects/ena_sample.py @@ -34,13 +34,6 @@ def fetch_characteristics(sample_dict: Dict, study_dict: Dict) -> List[Dict]: characteristic_categories = fetch_characteristic_categories(study_dict) return [ SampleCharacteristic.from_dict(char, characteristic_categories) - # { - # "category_id": char["category"]["@id"], - # "category_name": study_characteristic_category_name( - # study_dict, char["category"]["@id"] - # ), - # "value": char["value"]["annotationValue"], - # } for char in sample_dict["characteristics"] ] diff --git a/ena_objects/ena_submission.py b/ena_objects/ena_submission.py index c792ca1..2afc634 100644 --- a/ena_objects/ena_submission.py +++ b/ena_objects/ena_submission.py @@ -11,14 +11,23 @@ def merge_df_by_key( dataframe_dict_list: List[Dict[str, DataFrame]], key: str -) -> Dict[str, DataFrame]: +) -> DataFrame: + """Filters a list of pandas DataFrames on the provided key and merges them by row. + + Args: + dataframe_dict_list (List[Dict[str, DataFrame]]): list of dictionary, containing the DataFrames + key (str): key to filter the list on + + Returns: + DataFrame: resulting DataFrame + """ filtered_list = list(map(lambda d: d[key], dataframe_dict_list)) return pandas.concat(filtered_list) class EnaSubmission: """ - Generates a Submission object, compliant to the requirements of ENA + Wrapper objects, holding a Study """ def __init__( @@ -27,12 +36,26 @@ def __init__( ) -> None: self.studies = studies - def from_isa_json(isa_json: Dict) -> None: + def from_isa_json(isa_json: Dict[str, str]) -> None: + """Generates an EnaSubmission from a ISA JSON dictionary. + + Args: + isa_json (Dict[str, str]): ISA JSON dictionary + + Returns: + EnaSubmission: resulting EnaSubmission + """ return EnaSubmission( studies=EnaStudy.from_isa_json(isa_json), ) def generate_dataframes(self) -> Dict[str, DataFrame]: + """Generates all necessary DataFrames for the ENA Upload tool + and returns them in a dictionary. + + Returns: + Dict[str, DataFrame]: resulting dictionary of DataFrames + """ dataframes = [] for study in self.studies: study_df = EnaStudy.to_dataframe(study) diff --git a/ena_objects/other_material.py b/ena_objects/other_material.py index e295271..c43acef 100644 --- a/ena_objects/other_material.py +++ b/ena_objects/other_material.py @@ -10,7 +10,7 @@ class OtherMaterial(IsaBase): """ - docstring + This Class represents an 'other_material' in the ISA JSON and extends the ISA Base class. """ mandatory_keys = ["id", "name", "type", "other_material_characteristics"] @@ -28,7 +28,19 @@ def __init__( self.other_material_characteristics = other_material_characteristics @classmethod - def from_dict(cls, dict, characteristics_categories): + def from_dict( + cls, dict: Dict[str, str], characteristics_categories: List[Dict[str, str]] + ) -> None: + """Constructs an OtherMaterial, starting from a other_material dictionary + and a list of all other_material_characteristics. + + Args: + dict (Dict[str, str]): other_material dictionary + characteristics_categories (List[Dict[str, str]]): other_material_characteristics dictionary + + Returns: + OtherMaterial: other material object + """ return OtherMaterial( id=dict["@id"], name=dict["name"], From 27abbde8d4f93480bf20044dbce5e0e3f0c61e70 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 29 Sep 2023 16:11:42 +0200 Subject: [PATCH 32/62] Remove unused imports --- ena_objects/characteristic.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/ena_objects/characteristic.py b/ena_objects/characteristic.py index 29dcc20..9608e56 100644 --- a/ena_objects/characteristic.py +++ b/ena_objects/characteristic.py @@ -1,7 +1,5 @@ from typing import List, Dict -from decopatch import class_decorator -from exceptiongroup import catch from ena_objects.ena_std_lib import validate_dict From 1839994924acfbbe02f1177abe92952e620396ae Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Tue, 3 Oct 2023 09:41:11 +0200 Subject: [PATCH 33/62] Fix typo --- ena_objects/ena_experiment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ena_objects/ena_experiment.py b/ena_objects/ena_experiment.py index 23c279b..c3deb7e 100644 --- a/ena_objects/ena_experiment.py +++ b/ena_objects/ena_experiment.py @@ -193,7 +193,7 @@ def to_dict(self) -> Dict[str, str]: """ return { "alias": self.alias, - "study_alias": self.sample_alias, + "study_alias": self.study_alias, "sample_alias": self.sample_alias, "library_name": self.library_name, "parameter_values": [pv.to_dict() for pv in self.parameter_values], From 40ccb57714a1c064167e931fef1831e1dc96a761 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Wed, 11 Oct 2023 16:17:07 +0200 Subject: [PATCH 34/62] Implementation of assay streams for ena runs --- ena_objects/ena_run.py | 27 ++++++----- ena_objects/ena_std_lib.py | 86 +++++++++++++++++++++++++---------- ena_objects/ena_study.py | 86 +++++++++++++++++++++++------------ ena_objects/ena_submission.py | 53 ++++++++++++++++++--- example_read_isa_json.py | 11 ++++- 5 files changed, 187 insertions(+), 76 deletions(-) diff --git a/ena_objects/ena_run.py b/ena_objects/ena_run.py index 9f77699..9ade897 100644 --- a/ena_objects/ena_run.py +++ b/ena_objects/ena_run.py @@ -156,7 +156,7 @@ def __init__( self.data_file = data_file @classmethod - def from_study_dict(self, study_dict: Dict[str, str]) -> None: + def from_assay_stream(self, assay_stream: Dict[str, str]) -> None: """Generates a EnaRun object from a study dictionary. Args: @@ -167,20 +167,19 @@ def from_study_dict(self, study_dict: Dict[str, str]) -> None: """ ena_runs = [] - for assay in study_dict["assays"]: - super().check_dict_keys(assay, self.mandatory_keys) - sample_datafile_associations = get_assay_sample_associations(assay) - for data_file in assay["dataFiles"]: - current_data_file = DataFile.from_data_file_dict( - data_file, sample_datafile_associations - ) - ena_runs.append( - EnaRun( - alias=fetch_run_alias(data_file), - experiment_alias=fetch_experiment_alias(current_data_file), - data_file=current_data_file, - ) + super().check_dict_keys(assay_stream, self.mandatory_keys) + sample_datafile_associations = get_assay_sample_associations(assay_stream) + for data_file in assay_stream["dataFiles"]: + current_data_file = DataFile.from_data_file_dict( + data_file, sample_datafile_associations + ) + ena_runs.append( + EnaRun( + alias=fetch_run_alias(data_file), + experiment_alias=fetch_experiment_alias(current_data_file), + data_file=current_data_file, ) + ) return ena_runs diff --git a/ena_objects/ena_std_lib.py b/ena_objects/ena_std_lib.py index 3151183..ee86521 100644 --- a/ena_objects/ena_std_lib.py +++ b/ena_objects/ena_std_lib.py @@ -2,30 +2,6 @@ import re -def filter_attribute_by(attribute_list: str, key: str, value: str) -> Dict: - """Filters out the the attributes by key-value matching in the ISA JSON - - Args: - element (str): _description_ - key (str): _description_ - value (str): _description_ - - Example: - my_element = {"comments": [ - { "name": "SEEK Study ID", "value": "2" }, - { "name": "SEEK creation date", "value": "2023-09-22T06:14:34Z" } - ] - } - filter_attribute_by(element = my_element, key = 'name', value= 'SEEK Study ID') - - Output: { "name": "SEEK Study ID", "value": "2" } - - Returns: - Dict: The Dict that matches the criteria - """ - return [attribute for attribute in attribute_list if attribute[key] == value] - - def validate_dict(dict: Dict, key: str) -> None: """Raises an error if the structure of the ISA JSON Dict is not conform @@ -88,3 +64,65 @@ def clip_off_prefix(alias: Union[str, List[str]]) -> Union[str, List[str]]: "The 'clip_off_prefix' function only accepts strings or a list of strings" ) return result + + +def get_study_id(study_dict: Dict[str, str]) -> str: + """Fetches the study ID from the comments of a provided study dictionary + + Args: + study_dict (Dict[str, str]): study_dictionary + + Raises: + KeyError: Raised when the 'SEEK Study ID' comment is not found + + Returns: + str: Resulting identifier + """ + comment_names = [comment["name"] for comment in study_dict["comments"]] + for study_comment in study_dict["comments"]: + if "SEEK Study ID" not in comment_names: + raise KeyError( + "Bad dictionary. 'SEEK Study ID' comment is mandatory in Study." + ) + if study_comment["name"] == "SEEK Study ID": + return study_comment["value"] + + +# def fetch_requested_studies( +# studies_isa_json: Dict[str, str], dataset: Dict[str, str] +# ) -> List[Dict[str, str]]: +# """Fetches the requested studies by cross-matching the studies in the dataset + +# Args: +# studies_isa_json (Dict[str, str]): studies dictionary +# dataset (Dict[str, str]): dataset dictionary + +# Returns: +# List[Dict[str, str]]: Resulting list of study dictionaries +# """ +# requested_study_ids = [study["id"] for study in dataset["studies"]] + +# studies = [] +# for study in studies_isa_json: +# if get_study_id(study) in requested_study_ids: +# studies.append(study) + +# return studies + + +def fetch_assay_comment_by_name( + assay_stream: Dict[str, str], comment_name: str +) -> Dict[str, str]: + for comment in assay_stream["comments"]: + if comment["name"] == comment_name: + return comment + + +def fetch_assay_streams(study: Dict[str, str]) -> List[Dict[str, str]]: + return [assay for assay in study["assays"]] + + +# def fetch_requested_assay_ids(dataset: Dict[str, str]) -> List[str]: +# assay_ids = [] +# for study in dataset["studies"]: +# assay_ids.append([assay["id"] for assay in study["assays"]]) diff --git a/ena_objects/ena_study.py b/ena_objects/ena_study.py index a626982..72566ce 100644 --- a/ena_objects/ena_study.py +++ b/ena_objects/ena_study.py @@ -1,10 +1,15 @@ +from operator import index from typing import List, Optional, Dict from pandas import DataFrame from ena_objects.characteristic import IsaBase from ena_objects.ena_experiment import EnaExperiment from ena_objects.ena_run import EnaRun from ena_objects.ena_sample import EnaSample -from ena_objects.ena_std_lib import filter_attribute_by, validate_dict +from ena_objects.ena_std_lib import ( + fetch_assay_streams, + fetch_assay_comment_by_name, + get_study_id, +) def study_publication_ids(publication_isa_json: Dict) -> List[int]: @@ -16,29 +21,47 @@ def study_publication_ids(publication_isa_json: Dict) -> List[int]: Returns: List[int]: List of pubmed ID's """ - return [pub["id"] for pub in publication_isa_json] + return ",".join([str(pub["pubMedID"]) for pub in publication_isa_json]) -def study_alias(study_isa_json: str) -> str: - """Creates a study_alias, based on information of the study part of the ISA JSON. +def study_alias(assay_stream: Dict[str, str], seek_study_id) -> str: + """Creates a study_alias, based on information of the assay stream and study of the ISA JSON. Args: - study_isa_json (str): Study part of the ISA JSON + assay_stream Dict[str, str]: assay stream part of the ISA JSON + seek_study_id str: Study ID Returns: str: the study_alias """ - seek_study_id: str = filter_attribute_by( - study_isa_json["comments"], key="name", value="SEEK Study ID" - )[0]["value"] - return EnaStudy.prefix + seek_study_id + prefix = fetch_assay_comment_by_name(assay_stream, EnaStudy.prefix)["value"] + return prefix + seek_study_id + + +def study_title(assay_stream: Dict[str, str]) -> str: + return fetch_assay_comment_by_name(assay_stream, "ena_study_title")["value"] + + +def study_type(assay_stream: Dict[str, str]) -> str: + return fetch_assay_comment_by_name(assay_stream, "study_type")["value"] + + +def new_study_type(assay_stream: Dict[str, str]) -> str: + if study_type(assay_stream).lower() != "other": + return None + + return fetch_assay_comment_by_name(assay_stream, "new_study_type")["value"] + + +def study_abstract(assay_stream: Dict[str, str]) -> str: + return fetch_assay_comment_by_name(assay_stream, "ena_study_abstract")["value"] class EnaStudy(IsaBase): """Generates a Study object, compliant to the requirements of ENA""" mandatory_keys = ["title", "description", "publications"] - prefix = "https://datahub.elixir-belgium.org/studies/" # TODO: Replace by something less hard-coded + prefix = "ena_study_alias_prefix" def __init__( self, @@ -50,7 +73,7 @@ def __init__( experiments: List[EnaExperiment] = [], runs: List[EnaRun] = [], new_study_type: Optional[str] = None, - pubmed_id: Optional[List[int]] = None, + pubmed_id: Optional[str] = None, ) -> None: self.alias = alias self.title = title @@ -75,7 +98,7 @@ def to_dict(self): } @classmethod - def from_isa_json(self, isa_json: Dict): + def from_isa_json(self, isa_json: Dict[str, str]): """Method that creates an EnaStudy with params from ISA JSON Dictionary Args: @@ -87,25 +110,28 @@ def from_isa_json(self, isa_json: Dict): super().check_dict_keys(isa_json, self.mandatory_keys) ena_studies = [] - for study in isa_json["studies"]: - ena_studies.append( - EnaStudy( - alias=study_alias(study), - title=study["title"], - study_type="", # TODO: Replace by Custom metadata of the Assay level - study_abstract=study["description"], - new_study_type=None, - samples=EnaSample.from_study_dict(study), - experiments=EnaExperiment.from_study_dict( - study, study_alias(study) - ), - runs=EnaRun.from_study_dict(study), - pubmed_id=study_publication_ids( - publication_isa_json=study["publications"] - ), + assay_streams = fetch_assay_streams(study) + study_id = get_study_id(study) + for assay_stream in assay_streams: + current_study_alias = study_alias(assay_stream, study_id) + ena_studies.append( + EnaStudy( + alias=current_study_alias, + title=study_title(assay_stream), + study_type=study_type(assay_stream), + study_abstract=study_abstract(assay_stream), + new_study_type=new_study_type(assay_stream), + samples=EnaSample.from_study_dict(study), + experiments=EnaExperiment.from_study_dict( + study, current_study_alias + ), + runs=EnaRun.from_study_dict(assay_stream), + pubmed_id=study_publication_ids( + publication_isa_json=study["publications"] + ), + ) ) - ) return ena_studies @@ -115,4 +141,4 @@ def to_dataframe(self) -> DataFrame: Returns: DataFrame: Pandas DataFrame representation of the Study """ - return DataFrame.from_dict(self.to_dict()) + return DataFrame.from_dict([self.to_dict()]) diff --git a/ena_objects/ena_submission.py b/ena_objects/ena_submission.py index 2afc634..b9db393 100644 --- a/ena_objects/ena_submission.py +++ b/ena_objects/ena_submission.py @@ -1,7 +1,9 @@ from typing import List, Dict +from numpy import append from pandas import DataFrame import pandas +from ena_objects.characteristic import IsaBase from ena_objects.ena_experiment import export_experiments_to_dataframe from ena_objects.ena_run import export_runs_to_dataframe from ena_objects.ena_sample import export_samples_to_dataframe @@ -25,18 +27,57 @@ def merge_df_by_key( return pandas.concat(filtered_list) -class EnaSubmission: +def fetch_ena_studies(isa_json: Dict[str, str]) -> Dict[str, str]: + ena_studies = [] + for study in isa_json["studies"]: + for assay in study["assays"]: + ena_studies.append(assay) + return ena_studies + + +def fetch_assay(assay, required_assays): + for ra in required_assays: + for key, value in ra.items(): + for assay_comment in assay["comments"]: + if assay_comment["name"] == key and assay_comment["value"] == value: + return assay + + +def filter_assays( + isa_json: Dict[str, str], required_assays: List[Dict[str, str]] +) -> Dict[str, str]: + new_studies = [] + new_isa_json = isa_json + studies = new_isa_json.pop("studies") + for study in studies: + assays = study.pop("assays") + filtered_assays = [ + fetch_assay(assay, required_assays) + for assay in assays + if fetch_assay(assay, required_assays) is not None + ] + if len(filtered_assays) > 0: + study["assays"] = filtered_assays + new_studies.append(study) + new_isa_json["studies"] = new_studies + return new_isa_json + + +class EnaSubmission(IsaBase): """ - Wrapper objects, holding a Study + Wrapper objects, holding studies """ def __init__( self, studies: List[EnaStudy] = [], ) -> None: + super().__init__() self.studies = studies - def from_isa_json(isa_json: Dict[str, str]) -> None: + def from_isa_json( + isa_json: Dict[str, str], required_assays: List[Dict[str, str]] + ) -> None: """Generates an EnaSubmission from a ISA JSON dictionary. Args: @@ -45,9 +86,9 @@ def from_isa_json(isa_json: Dict[str, str]) -> None: Returns: EnaSubmission: resulting EnaSubmission """ - return EnaSubmission( - studies=EnaStudy.from_isa_json(isa_json), - ) + filtered_isa_json: Dict[str, str] = filter_assays(isa_json, required_assays) + + return EnaSubmission(studies=EnaStudy.from_isa_json(filtered_isa_json)) def generate_dataframes(self) -> Dict[str, DataFrame]: """Generates all necessary DataFrames for the ENA Upload tool diff --git a/example_read_isa_json.py b/example_read_isa_json.py index fc30d1b..8d381fa 100644 --- a/example_read_isa_json.py +++ b/example_read_isa_json.py @@ -3,14 +3,21 @@ from ena_objects.ena_submission import EnaSubmission # Read json file -isa_json_file = open("tests/test_data/isa_json_test_investigation.json") +isa_json_file = open( + "tests/test_data/multi_study_multi_assay_stream_investigation.json" +) isa_json = json.load(isa_json_file) # Change this to 'True' if you want to export the resulting DataFrames to an xlsx. export_to_excel = False outputfolder = "./output_folder/" -submission = EnaSubmission.from_isa_json(isa_json) +required_assays = [ + {"assay_stream": "Assay Stream 1"}, + {"ena_study_title": "ENA Study 2"}, +] + +submission = EnaSubmission.from_isa_json(isa_json, required_assays) submission_dfs = submission.generate_dataframes() if (not os.path.exists(outputfolder)) and export_to_excel: From 57b04a3f842b5c4a5062046880bc3c08388b1a57 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Wed, 11 Oct 2023 17:29:55 +0200 Subject: [PATCH 35/62] Rearranged EnaSubmission --- ena_objects/ena_experiment.py | 101 ++++++++++++++++++---------------- ena_objects/ena_std_lib.py | 10 ++++ ena_objects/ena_study.py | 95 ++++++++++---------------------- ena_objects/ena_submission.py | 65 ++++++++++++++-------- 4 files changed, 135 insertions(+), 136 deletions(-) diff --git a/ena_objects/ena_experiment.py b/ena_objects/ena_experiment.py index c3deb7e..01e0746 100644 --- a/ena_objects/ena_experiment.py +++ b/ena_objects/ena_experiment.py @@ -26,7 +26,9 @@ def experiment_alias(other_material: OtherMaterial) -> str: return EnaExperiment.prefix + seek_assays_id -def fetch_characteristic_categories(study_dict: Dict[str, str]) -> List[Dict[str, str]]: +def fetch_characteristic_categories( + assay_stream: Dict[str, str] +) -> List[Dict[str, str]]: """Fetches all characteristics categories from a provided study dictionary and returns them as a list of characteristics categories. @@ -37,15 +39,14 @@ def fetch_characteristic_categories(study_dict: Dict[str, str]) -> List[Dict[str List[Dict[str, str]]: List of the characteristics categories """ categories = [] - for assay in study_dict["assays"]: - for cc in assay["characteristicCategories"]: - categories.append( - {"id": cc["@id"], "value": cc["characteristicType"]["annotationValue"]} - ) + for cc in assay_stream["characteristicCategories"]: + categories.append( + {"id": cc["@id"], "value": cc["characteristicType"]["annotationValue"]} + ) return categories -def get_other_materials(study_dict: Dict[str, str]) -> List[OtherMaterial]: +def get_other_materials(assay_stream: Dict[str, str]) -> List[OtherMaterial]: """Returns a List of 'other materials' from a study dictionary and returns them as a list of OtherMaterial objects. @@ -56,13 +57,12 @@ def get_other_materials(study_dict: Dict[str, str]) -> List[OtherMaterial]: List[OtherMaterial]: Resulting list of OtherMaterial objects """ other_materials = [] - characteristics_categories = fetch_characteristic_categories(study_dict) - for assay in study_dict["assays"]: - for om in assay["materials"]["otherMaterials"]: - other_material = OtherMaterial.from_dict( - dict=om, characteristics_categories=characteristics_categories - ) - other_materials.append(other_material) + characteristics_categories = fetch_characteristic_categories(assay_stream) + for om in assay_stream["materials"]["otherMaterials"]: + other_material = OtherMaterial.from_dict( + dict=om, characteristics_categories=characteristics_categories + ) + other_materials.append(other_material) return other_materials @@ -81,7 +81,7 @@ def library_names(study_dict: Dict[str, str]) -> List[str]: def get_derived_sample_alias( other_material: OtherMaterial, - study_dict: Dict[str, str], + assay_stream: Dict[str, str], return_multiple: bool = False, ) -> Union[str, List[str]]: """Gets Sample ids, an 'other material' is derived from. @@ -95,19 +95,18 @@ def get_derived_sample_alias( str: Resulting derived sample id or list of derived sample id's """ assoc_sample_ids = [] - for assay in study_dict["assays"]: - sample_associations = get_assay_sample_associations(assay) - for sa in sample_associations: - if clip_off_prefix(other_material.id) in clip_off_prefix(sa["output"]): - # sa["output"] => '#sample/' - # other_material.id => '#other_material/' - if return_multiple: - for input in sa["input"]: - alias = EnaSample.prefix + clip_off_prefix(input) - assoc_sample_ids.append(alias) - else: - input = sa["input"][0] - return EnaSample.prefix + clip_off_prefix(input) + sample_associations = get_assay_sample_associations(assay_stream) + for sa in sample_associations: + if clip_off_prefix(other_material.id) in clip_off_prefix(sa["output"]): + # sa["output"] => '#sample/' + # other_material.id => '#other_material/' + if return_multiple: + for input in sa["input"]: + alias = EnaSample.prefix + clip_off_prefix(input) + assoc_sample_ids.append(alias) + else: + input = sa["input"][0] + return EnaSample.prefix + clip_off_prefix(input) return assoc_sample_ids @@ -132,7 +131,9 @@ def fetch_parameters(protocol_dict: Dict[str, str]) -> List[Dict[str, str]]: return parameters -def get_parameter_values(study_dict: Dict[str, str]) -> Dict[str, str]: +def get_parameter_values( + assay_stream: Dict[str, str], study_protocols_dict: Dict[str, str] +) -> Dict[str, str]: """Returns all parameter values from a study dictionary. Args: @@ -142,17 +143,16 @@ def get_parameter_values(study_dict: Dict[str, str]) -> Dict[str, str]: Dict[str, str]: Resulting dictionary of parameter values. """ param_vals = [] - parameters = fetch_parameters(study_dict["protocols"]) - for assay in study_dict["assays"]: - for ps in assay["processSequence"]: - sample_id = clip_off_prefix(ps["@id"]) - parameter_values = [ - ParameterValue.from_dict(parameter_value, parameters) - for parameter_value in ps["parameterValues"] - ] - param_vals.append( - {"sample_id": sample_id, "parameter_values": parameter_values} - ) + parameters = fetch_parameters(study_protocols_dict) + for ps in assay_stream["processSequence"]: + sample_id = clip_off_prefix(ps["@id"]) + parameter_values = [ + ParameterValue.from_dict(parameter_value, parameters) + for parameter_value in ps["parameterValues"] + ] + param_vals.append( + {"sample_id": sample_id, "parameter_values": parameter_values} + ) return param_vals @@ -162,10 +162,10 @@ class EnaExperiment(IsaBase): """ mandatory_keys = [ - "protocols", - "materials", + "filename", + "measurementType", "processSequence", - "assays", + "comments", ] prefix = "https://datahub.elixir-belgium.org/samples/" # TODO: Replace by something less hard-coded @@ -203,7 +203,12 @@ def to_dict(self) -> Dict[str, str]: } @classmethod - def from_study_dict(self, study_dict: Dict[str, str], study_alias: str) -> None: + def from_assay_stream( + self, + assay_stream: Dict[str, str], + study_alias: str, + protocols_dict: Dict[str, str], + ) -> None: """Generates a EnaExperiment object from a study dictionary. Args: @@ -213,15 +218,15 @@ def from_study_dict(self, study_dict: Dict[str, str], study_alias: str) -> None: Returns: EnaExperiment: Resulting EnaExperiment object """ - super().check_dict_keys(study_dict, self.mandatory_keys) + super().check_dict_keys(assay_stream, self.mandatory_keys) - other_materials = get_other_materials(study_dict) - parameter_values = get_parameter_values(study_dict) + other_materials = get_other_materials(assay_stream) + parameter_values = get_parameter_values(assay_stream, protocols_dict) ena_experiments = [] for om in other_materials: om_id = clip_off_prefix(om.id) - s_alias = get_derived_sample_alias(om, study_dict) + s_alias = get_derived_sample_alias(om, assay_stream) filtered_parameter_vals = list( filter(lambda pv: pv["sample_id"] == om_id, parameter_values) ) diff --git a/ena_objects/ena_std_lib.py b/ena_objects/ena_std_lib.py index ee86521..4ccc03c 100644 --- a/ena_objects/ena_std_lib.py +++ b/ena_objects/ena_std_lib.py @@ -126,3 +126,13 @@ def fetch_assay_streams(study: Dict[str, str]) -> List[Dict[str, str]]: # assay_ids = [] # for study in dataset["studies"]: # assay_ids.append([assay["id"] for assay in study["assays"]]) +def study_publication_ids(publication_isa_json: Dict) -> List[int]: + """Retrieves the pubmed_ids from the ISA JSON + + Args: + publication_isa_json (Dict): Publication part of the ISA JSON dictionary + + Returns: + List[int]: List of pubmed ID's + """ + return ",".join([str(pub["pubMedID"]) for pub in publication_isa_json]) diff --git a/ena_objects/ena_study.py b/ena_objects/ena_study.py index 72566ce..6883080 100644 --- a/ena_objects/ena_study.py +++ b/ena_objects/ena_study.py @@ -12,30 +12,18 @@ ) -def study_publication_ids(publication_isa_json: Dict) -> List[int]: - """Retrieves the pubmed_ids from the ISA JSON +# def study_alias(assay_stream: Dict[str, str]) -> str: +# """Creates a study_alias, based on information of the assay stream and study of the ISA JSON. - Args: - publication_isa_json (Dict): Publication part of the ISA JSON dictionary +# Args: +# assay_stream Dict[str, str]: assay stream part of the ISA JSON +# seek_study_id str: Study ID - Returns: - List[int]: List of pubmed ID's - """ - return ",".join([str(pub["pubMedID"]) for pub in publication_isa_json]) - - -def study_alias(assay_stream: Dict[str, str], seek_study_id) -> str: - """Creates a study_alias, based on information of the assay stream and study of the ISA JSON. - - Args: - assay_stream Dict[str, str]: assay stream part of the ISA JSON - seek_study_id str: Study ID - - Returns: - str: the study_alias - """ - prefix = fetch_assay_comment_by_name(assay_stream, EnaStudy.prefix)["value"] - return prefix + seek_study_id +# Returns: +# str: the study_alias +# """ +# prefix = fetch_assay_comment_by_name(assay_stream, EnaStudy.prefix)["value"] +# return prefix + seek_study_id def study_title(assay_stream: Dict[str, str]) -> str: @@ -60,7 +48,7 @@ def study_abstract(assay_stream: Dict[str, str]) -> str: class EnaStudy(IsaBase): """Generates a Study object, compliant to the requirements of ENA""" - mandatory_keys = ["title", "description", "publications"] + mandatory_keys = ["filename", "comments", "materials"] prefix = "ena_study_alias_prefix" def __init__( @@ -69,9 +57,6 @@ def __init__( title: str, study_type: str, study_abstract: str, - samples: List[EnaSample], - experiments: List[EnaExperiment] = [], - runs: List[EnaRun] = [], new_study_type: Optional[str] = None, pubmed_id: Optional[str] = None, ) -> None: @@ -83,10 +68,6 @@ def __init__( self.new_study_type = new_study_type self.pubmed_id = pubmed_id - self.samples = samples - self.experiments = experiments - self.runs = runs - def to_dict(self): return { "alias": self.alias, @@ -98,7 +79,7 @@ def to_dict(self): } @classmethod - def from_isa_json(self, isa_json: Dict[str, str]): + def from_assay_stream(self, assay_stream: Dict[str, str], pubmed_ids): """Method that creates an EnaStudy with params from ISA JSON Dictionary Args: @@ -107,38 +88,22 @@ def from_isa_json(self, isa_json: Dict[str, str]): Returns: EnaStudy: EnaStudy object """ - super().check_dict_keys(isa_json, self.mandatory_keys) - - ena_studies = [] - for study in isa_json["studies"]: - assay_streams = fetch_assay_streams(study) - study_id = get_study_id(study) - for assay_stream in assay_streams: - current_study_alias = study_alias(assay_stream, study_id) - ena_studies.append( - EnaStudy( - alias=current_study_alias, - title=study_title(assay_stream), - study_type=study_type(assay_stream), - study_abstract=study_abstract(assay_stream), - new_study_type=new_study_type(assay_stream), - samples=EnaSample.from_study_dict(study), - experiments=EnaExperiment.from_study_dict( - study, current_study_alias - ), - runs=EnaRun.from_study_dict(assay_stream), - pubmed_id=study_publication_ids( - publication_isa_json=study["publications"] - ), - ) - ) - - return ena_studies - - def to_dataframe(self) -> DataFrame: - """Dumps the study object in a pandas DataFrame of the object + super().check_dict_keys(assay_stream, self.mandatory_keys) - Returns: - DataFrame: Pandas DataFrame representation of the Study - """ - return DataFrame.from_dict([self.to_dict()]) + return EnaStudy( + alias=assay_stream["@id"], + title=study_title(assay_stream), + study_type=study_type(assay_stream), + study_abstract=study_abstract(assay_stream), + new_study_type=new_study_type(assay_stream), + pubmed_id=pubmed_ids, + ) + + +def export_studies_to_dataframe(studies: List[EnaStudy]) -> DataFrame: + """Dumps the study objects in a pandas DataFrame + + Returns: + DataFrame: Pandas DataFrame representation of the Studies + """ + return DataFrame.from_dict([study.to_dict() for study in studies]) diff --git a/ena_objects/ena_submission.py b/ena_objects/ena_submission.py index b9db393..612caab 100644 --- a/ena_objects/ena_submission.py +++ b/ena_objects/ena_submission.py @@ -4,11 +4,12 @@ from pandas import DataFrame import pandas from ena_objects.characteristic import IsaBase -from ena_objects.ena_experiment import export_experiments_to_dataframe -from ena_objects.ena_run import export_runs_to_dataframe -from ena_objects.ena_sample import export_samples_to_dataframe +from ena_objects.ena_experiment import EnaExperiment, export_experiments_to_dataframe +from ena_objects.ena_run import EnaRun, export_runs_to_dataframe +from ena_objects.ena_sample import EnaSample, export_samples_to_dataframe +from ena_objects.ena_std_lib import fetch_assay_streams, study_publication_ids -from ena_objects.ena_study import EnaStudy +from ena_objects.ena_study import EnaStudy, export_studies_to_dataframe def merge_df_by_key( @@ -71,9 +72,15 @@ class EnaSubmission(IsaBase): def __init__( self, studies: List[EnaStudy] = [], + samples: List[EnaSample] = [], + experiments: List[EnaExperiment] = [], + runs: List[EnaRun] = [], ) -> None: super().__init__() self.studies = studies + self.samples = samples + self.experiments = experiments + self.runs = runs def from_isa_json( isa_json: Dict[str, str], required_assays: List[Dict[str, str]] @@ -87,8 +94,34 @@ def from_isa_json( EnaSubmission: resulting EnaSubmission """ filtered_isa_json: Dict[str, str] = filter_assays(isa_json, required_assays) + samples = [] + studies = [] + experiments = [] + runs = [] + for study in filtered_isa_json["studies"]: + [samples.append(sample) for sample in EnaSample.from_study_dict(study)] + + pubmed_ids = study_publication_ids( + publication_isa_json=study["publications"] + ) + current_study_protocols_dict = study["protocols"] + assay_streams = fetch_assay_streams(study) + for assay_stream in assay_streams: + study = EnaStudy.from_assay_stream(assay_stream, pubmed_ids) + studies.append(study) + + [ + experiments.append(experiment) + for experiment in EnaExperiment.from_assay_stream( + assay_stream, study.alias, current_study_protocols_dict + ) + ] - return EnaSubmission(studies=EnaStudy.from_isa_json(filtered_isa_json)) + [runs.append(run) for run in EnaRun.from_assay_stream(assay_stream)] + + return EnaSubmission( + studies=studies, samples=samples, experiments=experiments, runs=runs + ) def generate_dataframes(self) -> Dict[str, DataFrame]: """Generates all necessary DataFrames for the ENA Upload tool @@ -97,23 +130,9 @@ def generate_dataframes(self) -> Dict[str, DataFrame]: Returns: Dict[str, DataFrame]: resulting dictionary of DataFrames """ - dataframes = [] - for study in self.studies: - study_df = EnaStudy.to_dataframe(study) - samples_df = export_samples_to_dataframe(study.samples) - experiments_df = export_experiments_to_dataframe(study.experiments) - runs_df = export_runs_to_dataframe(study.runs) - dataframes.append( - { - "study_df": study_df, - "samples_df": samples_df, - "experiments_df": experiments_df, - "runs_df": runs_df, - } - ) return { - "study": merge_df_by_key(dataframes, "study_df"), - "samples": merge_df_by_key(dataframes, "samples_df"), - "experiments": merge_df_by_key(dataframes, "experiments_df"), - "runs": merge_df_by_key(dataframes, "runs_df"), + "study": export_studies_to_dataframe(self.studies), + "samples": export_samples_to_dataframe(self.samples), + "experiments": export_experiments_to_dataframe(self.experiments), + "runs": export_runs_to_dataframe(self.runs), } From eb10676741a1de983a2d20129c2171b7c055791c Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Thu, 12 Oct 2023 13:17:00 +0200 Subject: [PATCH 36/62] Prefix is fetched from custom metadata --- ena_objects/ena_experiment.py | 25 ++++++++++++++++--------- ena_objects/ena_run.py | 17 ++++++++++++----- ena_objects/ena_sample.py | 16 +++++++++------- ena_objects/ena_std_lib.py | 8 ++++++++ ena_objects/ena_study.py | 30 +++++++++++++----------------- ena_objects/ena_submission.py | 25 ------------------------- example_read_isa_json.py | 6 +++--- 7 files changed, 61 insertions(+), 66 deletions(-) diff --git a/ena_objects/ena_experiment.py b/ena_objects/ena_experiment.py index 01e0746..3092225 100644 --- a/ena_objects/ena_experiment.py +++ b/ena_objects/ena_experiment.py @@ -2,7 +2,11 @@ from pandas import DataFrame -from ena_objects.ena_std_lib import get_assay_sample_associations, clip_off_prefix +from ena_objects.ena_std_lib import ( + fetch_assay_comment_by_name, + get_assay_sample_associations, + clip_off_prefix, +) from ena_objects.characteristic import ( IsaBase, OtherMaterialCharacteristic, @@ -12,18 +16,19 @@ from ena_objects.other_material import OtherMaterial -def experiment_alias(other_material: OtherMaterial) -> str: +def experiment_alias(other_material: OtherMaterial, prefix: str) -> str: """Generates an alias for the experiment, starting from an other_material - and the prefix specified in the class. + and a prefix. Args: - other_material (OtherMaterial): _description_ + other_material (OtherMaterial): OtherMaterial object + prefix (str): Prefix Returns: - str: _description_ + str: experiment alias """ seek_assays_id: str = clip_off_prefix(other_material.id) - return EnaExperiment.prefix + seek_assays_id + return prefix + seek_assays_id def fetch_characteristic_categories( @@ -167,7 +172,7 @@ class EnaExperiment(IsaBase): "processSequence", "comments", ] - prefix = "https://datahub.elixir-belgium.org/samples/" # TODO: Replace by something less hard-coded + prefix = "ena_experiment_alias_prefix" def __init__( self, @@ -222,7 +227,9 @@ def from_assay_stream( other_materials = get_other_materials(assay_stream) parameter_values = get_parameter_values(assay_stream, protocols_dict) - + prefix = fetch_assay_comment_by_name(assay_stream, EnaExperiment.prefix)[ + "value" + ] ena_experiments = [] for om in other_materials: om_id = clip_off_prefix(om.id) @@ -238,7 +245,7 @@ def from_assay_stream( ena_experiments.append( EnaExperiment( - alias=experiment_alias(om), + alias=experiment_alias(om, prefix), library_name=om.name, study_alias=study_alias, sample_alias=s_alias, diff --git a/ena_objects/ena_run.py b/ena_objects/ena_run.py index 9ade897..0104e3c 100644 --- a/ena_objects/ena_run.py +++ b/ena_objects/ena_run.py @@ -3,7 +3,11 @@ from pandas import DataFrame from ena_objects.characteristic import IsaBase -from ena_objects.ena_std_lib import get_assay_sample_associations, clip_off_prefix +from ena_objects.ena_std_lib import ( + fetch_assay_comment_by_name, + get_assay_sample_associations, + clip_off_prefix, +) class DataFileComment(IsaBase): @@ -94,17 +98,18 @@ def to_dict(self) -> Dict[str, str]: } -def fetch_run_alias(data_file: Dict[str, str]) -> str: +def run_alias(data_file: Dict[str, str], prefix: str) -> str: """Generates an alias for the run, based on the data file dictionary and prefix specified in the Class Args: data_file (Dict[str, str]): Input data file dictionary + prefix (str): prefix for alias Returns: str: Resulting alias """ - return EnaRun.prefix + clip_off_prefix(data_file["@id"]) + return prefix + clip_off_prefix(data_file["@id"]) def get_derived_expertiment_id( @@ -142,7 +147,7 @@ class EnaRun(IsaBase): """ mandatory_keys = ["dataFiles", "processSequence"] - prefix = "https://datahub.elixir-belgium.org/samples/" # TODO: Replace by something less hard-coded + prefix = "ena_run_alias_prefix" def __init__( self, @@ -169,13 +174,15 @@ def from_assay_stream(self, assay_stream: Dict[str, str]) -> None: super().check_dict_keys(assay_stream, self.mandatory_keys) sample_datafile_associations = get_assay_sample_associations(assay_stream) + prefix = fetch_assay_comment_by_name(assay_stream, EnaRun.prefix)["value"] + for data_file in assay_stream["dataFiles"]: current_data_file = DataFile.from_data_file_dict( data_file, sample_datafile_associations ) ena_runs.append( EnaRun( - alias=fetch_run_alias(data_file), + alias=run_alias(data_file, prefix), experiment_alias=fetch_experiment_alias(current_data_file), data_file=current_data_file, ) diff --git a/ena_objects/ena_sample.py b/ena_objects/ena_sample.py index c33df99..8248878 100644 --- a/ena_objects/ena_sample.py +++ b/ena_objects/ena_sample.py @@ -1,9 +1,10 @@ -import re from typing import List, Dict from ena_objects.characteristic import SampleCharacteristic from pandas import DataFrame +from ena_objects.ena_std_lib import clip_off_prefix, fetch_study_comment_by_name + def fetch_characteristic_categories(study_dict: Dict) -> Dict: """Retrieves the name of a characteristic id @@ -72,7 +73,7 @@ def associated_source_characteristics(sources_data: Dict, ids: List[str]) -> Dic return sd["characteristics"] -def sample_alias(id: str) -> str: +def sample_alias(id: str, prefix) -> str: """Retrieves the sample's alias Args: @@ -81,9 +82,7 @@ def sample_alias(id: str) -> str: Returns: str: Unique string representation of the alias """ - - sample_id = re.split("/", id)[1] - return EnaSample.prefix + sample_id + return prefix + clip_off_prefix(id) class EnaSample: @@ -91,7 +90,7 @@ class EnaSample: Generates an Sample object, compliant to the requirements of ENA """ - prefix: str = "https://datahub.elixir-belgium.org/samples/" # TODO: Replace by something less hard-coded + prefix: str = "ena_sample_alias_prefix" def __init__(self, characteristics: List[SampleCharacteristic], alias: str) -> None: self.alias = alias @@ -136,9 +135,12 @@ def from_study_dict(self, study_dict: Dict) -> None: for sc in associated_source_characteristics(sources_data, sd["source"]): sd["characteristics"].append(sc) + study_alias_prefix = fetch_study_comment_by_name(study_dict, self.prefix)[ + "value" + ] return [ EnaSample( - alias=sample_alias(sd["id"]), + alias=sample_alias(sd["id"], study_alias_prefix), characteristics=sd["characteristics"], ) for sd in samples_data diff --git a/ena_objects/ena_std_lib.py b/ena_objects/ena_std_lib.py index 4ccc03c..7e408f7 100644 --- a/ena_objects/ena_std_lib.py +++ b/ena_objects/ena_std_lib.py @@ -118,6 +118,14 @@ def fetch_assay_comment_by_name( return comment +def fetch_study_comment_by_name( + study_dict: Dict[str, str], comment_name: str +) -> Dict[str, str]: + for comment in study_dict["comments"]: + if comment["name"] == comment_name: + return comment + + def fetch_assay_streams(study: Dict[str, str]) -> List[Dict[str, str]]: return [assay for assay in study["assays"]] diff --git a/ena_objects/ena_study.py b/ena_objects/ena_study.py index 6883080..4c6fc70 100644 --- a/ena_objects/ena_study.py +++ b/ena_objects/ena_study.py @@ -1,29 +1,25 @@ -from operator import index from typing import List, Optional, Dict from pandas import DataFrame from ena_objects.characteristic import IsaBase -from ena_objects.ena_experiment import EnaExperiment -from ena_objects.ena_run import EnaRun -from ena_objects.ena_sample import EnaSample from ena_objects.ena_std_lib import ( - fetch_assay_streams, + clip_off_prefix, fetch_assay_comment_by_name, - get_study_id, ) -# def study_alias(assay_stream: Dict[str, str]) -> str: -# """Creates a study_alias, based on information of the assay stream and study of the ISA JSON. +def study_alias(assay_stream: Dict[str, str]) -> str: + """Creates a study_alias, based on information of the assay stream and study of the ISA JSON. -# Args: -# assay_stream Dict[str, str]: assay stream part of the ISA JSON -# seek_study_id str: Study ID + Args: + assay_stream Dict[str, str]: assay stream part of the ISA JSON + seek_study_id str: Study ID -# Returns: -# str: the study_alias -# """ -# prefix = fetch_assay_comment_by_name(assay_stream, EnaStudy.prefix)["value"] -# return prefix + seek_study_id + Returns: + str: the study_alias + """ + assay_stream_id = clip_off_prefix(assay_stream["@id"]) + prefix = fetch_assay_comment_by_name(assay_stream, EnaStudy.prefix)["value"] + return prefix + assay_stream_id def study_title(assay_stream: Dict[str, str]) -> str: @@ -91,7 +87,7 @@ def from_assay_stream(self, assay_stream: Dict[str, str], pubmed_ids): super().check_dict_keys(assay_stream, self.mandatory_keys) return EnaStudy( - alias=assay_stream["@id"], + alias=study_alias(assay_stream), title=study_title(assay_stream), study_type=study_type(assay_stream), study_abstract=study_abstract(assay_stream), diff --git a/ena_objects/ena_submission.py b/ena_objects/ena_submission.py index 612caab..2daf6d9 100644 --- a/ena_objects/ena_submission.py +++ b/ena_objects/ena_submission.py @@ -1,5 +1,4 @@ from typing import List, Dict -from numpy import append from pandas import DataFrame import pandas @@ -12,30 +11,6 @@ from ena_objects.ena_study import EnaStudy, export_studies_to_dataframe -def merge_df_by_key( - dataframe_dict_list: List[Dict[str, DataFrame]], key: str -) -> DataFrame: - """Filters a list of pandas DataFrames on the provided key and merges them by row. - - Args: - dataframe_dict_list (List[Dict[str, DataFrame]]): list of dictionary, containing the DataFrames - key (str): key to filter the list on - - Returns: - DataFrame: resulting DataFrame - """ - filtered_list = list(map(lambda d: d[key], dataframe_dict_list)) - return pandas.concat(filtered_list) - - -def fetch_ena_studies(isa_json: Dict[str, str]) -> Dict[str, str]: - ena_studies = [] - for study in isa_json["studies"]: - for assay in study["assays"]: - ena_studies.append(assay) - return ena_studies - - def fetch_assay(assay, required_assays): for ra in required_assays: for key, value in ra.items(): diff --git a/example_read_isa_json.py b/example_read_isa_json.py index 8d381fa..dbc5854 100644 --- a/example_read_isa_json.py +++ b/example_read_isa_json.py @@ -9,12 +9,12 @@ isa_json = json.load(isa_json_file) # Change this to 'True' if you want to export the resulting DataFrames to an xlsx. -export_to_excel = False +export_to_excel = True outputfolder = "./output_folder/" required_assays = [ - {"assay_stream": "Assay Stream 1"}, - {"ena_study_title": "ENA Study 2"}, + {"assay_stream": "Ena stream 1"}, + {"ena_study_title": "Ena Study 2"}, ] submission = EnaSubmission.from_isa_json(isa_json, required_assays) From ce88034c43aafb3eefd9d75ac268e69685958ea9 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 13 Oct 2023 09:00:03 +0200 Subject: [PATCH 37/62] Sanitize samples + fix sample_alias in experiments --- ena_objects/ena_experiment.py | 8 +++++--- ena_objects/ena_submission.py | 38 ++++++++++++++++++++++++++++++----- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/ena_objects/ena_experiment.py b/ena_objects/ena_experiment.py index 3092225..2be7403 100644 --- a/ena_objects/ena_experiment.py +++ b/ena_objects/ena_experiment.py @@ -87,6 +87,7 @@ def library_names(study_dict: Dict[str, str]) -> List[str]: def get_derived_sample_alias( other_material: OtherMaterial, assay_stream: Dict[str, str], + sample_alias_prefix: str, return_multiple: bool = False, ) -> Union[str, List[str]]: """Gets Sample ids, an 'other material' is derived from. @@ -107,11 +108,11 @@ def get_derived_sample_alias( # other_material.id => '#other_material/' if return_multiple: for input in sa["input"]: - alias = EnaSample.prefix + clip_off_prefix(input) + alias = sample_alias_prefix + clip_off_prefix(input) assoc_sample_ids.append(alias) else: input = sa["input"][0] - return EnaSample.prefix + clip_off_prefix(input) + return sample_alias_prefix + clip_off_prefix(input) return assoc_sample_ids @@ -212,6 +213,7 @@ def from_assay_stream( self, assay_stream: Dict[str, str], study_alias: str, + sample_alias_prefix: str, protocols_dict: Dict[str, str], ) -> None: """Generates a EnaExperiment object from a study dictionary. @@ -233,7 +235,7 @@ def from_assay_stream( ena_experiments = [] for om in other_materials: om_id = clip_off_prefix(om.id) - s_alias = get_derived_sample_alias(om, assay_stream) + s_alias = get_derived_sample_alias(om, assay_stream, sample_alias_prefix) filtered_parameter_vals = list( filter(lambda pv: pv["sample_id"] == om_id, parameter_values) ) diff --git a/ena_objects/ena_submission.py b/ena_objects/ena_submission.py index 2daf6d9..a591d36 100644 --- a/ena_objects/ena_submission.py +++ b/ena_objects/ena_submission.py @@ -1,12 +1,18 @@ from typing import List, Dict from pandas import DataFrame -import pandas from ena_objects.characteristic import IsaBase -from ena_objects.ena_experiment import EnaExperiment, export_experiments_to_dataframe +from ena_objects.ena_experiment import ( + EnaExperiment, + export_experiments_to_dataframe, +) from ena_objects.ena_run import EnaRun, export_runs_to_dataframe from ena_objects.ena_sample import EnaSample, export_samples_to_dataframe -from ena_objects.ena_std_lib import fetch_assay_streams, study_publication_ids +from ena_objects.ena_std_lib import ( + fetch_assay_streams, + fetch_study_comment_by_name, + study_publication_ids, +) from ena_objects.ena_study import EnaStudy, export_studies_to_dataframe @@ -81,6 +87,9 @@ def from_isa_json( ) current_study_protocols_dict = study["protocols"] assay_streams = fetch_assay_streams(study) + ena_sample_alias_prefix = fetch_study_comment_by_name( + study, EnaSample.prefix + )["value"] for assay_stream in assay_streams: study = EnaStudy.from_assay_stream(assay_stream, pubmed_ids) studies.append(study) @@ -88,15 +97,34 @@ def from_isa_json( [ experiments.append(experiment) for experiment in EnaExperiment.from_assay_stream( - assay_stream, study.alias, current_study_protocols_dict + assay_stream, + study.alias, + ena_sample_alias_prefix, + current_study_protocols_dict, ) ] [runs.append(run) for run in EnaRun.from_assay_stream(assay_stream)] - return EnaSubmission( + ena_submission = EnaSubmission( studies=studies, samples=samples, experiments=experiments, runs=runs ) + ena_submission.sanitize_samples() + return ena_submission + + def sanitize_samples(self): + unused_samples = [] + + for sample in self.samples: + experiment_sample_aliases = [ + experiment.sample_alias for experiment in self.experiments + ] + if sample.alias not in experiment_sample_aliases: + unused_samples.append(sample.alias) + + self.samples = [ + sample for sample in self.samples if sample.alias not in unused_samples + ] def generate_dataframes(self) -> Dict[str, DataFrame]: """Generates all necessary DataFrames for the ENA Upload tool From a5431e3a984e1a9cda3fe27061e5c9d1133f0824 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 13 Oct 2023 09:00:43 +0200 Subject: [PATCH 38/62] clean up --- ena_objects/ena_std_lib.py | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/ena_objects/ena_std_lib.py b/ena_objects/ena_std_lib.py index 7e408f7..5cd56c8 100644 --- a/ena_objects/ena_std_lib.py +++ b/ena_objects/ena_std_lib.py @@ -88,28 +88,6 @@ def get_study_id(study_dict: Dict[str, str]) -> str: return study_comment["value"] -# def fetch_requested_studies( -# studies_isa_json: Dict[str, str], dataset: Dict[str, str] -# ) -> List[Dict[str, str]]: -# """Fetches the requested studies by cross-matching the studies in the dataset - -# Args: -# studies_isa_json (Dict[str, str]): studies dictionary -# dataset (Dict[str, str]): dataset dictionary - -# Returns: -# List[Dict[str, str]]: Resulting list of study dictionaries -# """ -# requested_study_ids = [study["id"] for study in dataset["studies"]] - -# studies = [] -# for study in studies_isa_json: -# if get_study_id(study) in requested_study_ids: -# studies.append(study) - -# return studies - - def fetch_assay_comment_by_name( assay_stream: Dict[str, str], comment_name: str ) -> Dict[str, str]: @@ -130,10 +108,6 @@ def fetch_assay_streams(study: Dict[str, str]) -> List[Dict[str, str]]: return [assay for assay in study["assays"]] -# def fetch_requested_assay_ids(dataset: Dict[str, str]) -> List[str]: -# assay_ids = [] -# for study in dataset["studies"]: -# assay_ids.append([assay["id"] for assay in study["assays"]]) def study_publication_ids(publication_isa_json: Dict) -> List[int]: """Retrieves the pubmed_ids from the ISA JSON From a590710f2710e7d6e7dd3edfbebe760cf9796386 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 13 Oct 2023 16:45:25 +0200 Subject: [PATCH 39/62] Replace simple dictionary validation by extensive JSON schema validation --- ena_objects/characteristic.py | 19 +- ena_objects/ena_experiment.py | 7 - ena_objects/ena_run.py | 8 - ena_objects/ena_std_lib.py | 14 - ena_objects/ena_study.py | 3 - ena_objects/ena_submission.py | 8 + ena_objects/json_schemas/assay_schema.json | 88 + ena_objects/json_schemas/comment_schema.json | 16 + ena_objects/json_schemas/data_schema.json | 42 + ena_objects/json_schemas/factor_schema.json | 26 + .../json_schemas/factor_value_schema.json | 38 + .../json_schemas/investigation_schema.json | 82 + .../material_attribute_schema.json | 16 + .../material_attribute_value_schema.json | 38 + ena_objects/json_schemas/material_schema.json | 41 + .../ontology_annotation_schema.json | 38 + .../ontology_source_reference_schema.json | 28 + .../json_schemas/organization_schema.json | 12 + ena_objects/json_schemas/person_schema.json | 50 + .../process_parameter_value_schema.json | 34 + ena_objects/json_schemas/process_schema.json | 87 + .../protocol_parameter_schema.json | 22 + ena_objects/json_schemas/protocol_schema.json | 62 + .../json_schemas/publication_schema.json | 31 + ena_objects/json_schemas/sample_schema.json | 40 + ena_objects/json_schemas/source_schema.json | 27 + ena_objects/json_schemas/study_schema.json | 141 + ena_objects/other_material.py | 3 - requirements.txt | 1 + .../test_data/bad_assay_stream_isa_json.json | 5067 ++++++++++++++++ .../test_data/bad_investigation_isa_json.json | 55 + .../isa_json_test_investigation.json | 2536 -------- ...tudy_multi_assay_stream_investigation.json | 5131 +++++++++++++++++ tests/test_ena_objects.py | 55 +- 34 files changed, 11271 insertions(+), 2595 deletions(-) create mode 100644 ena_objects/json_schemas/assay_schema.json create mode 100644 ena_objects/json_schemas/comment_schema.json create mode 100644 ena_objects/json_schemas/data_schema.json create mode 100644 ena_objects/json_schemas/factor_schema.json create mode 100644 ena_objects/json_schemas/factor_value_schema.json create mode 100644 ena_objects/json_schemas/investigation_schema.json create mode 100644 ena_objects/json_schemas/material_attribute_schema.json create mode 100644 ena_objects/json_schemas/material_attribute_value_schema.json create mode 100644 ena_objects/json_schemas/material_schema.json create mode 100644 ena_objects/json_schemas/ontology_annotation_schema.json create mode 100644 ena_objects/json_schemas/ontology_source_reference_schema.json create mode 100644 ena_objects/json_schemas/organization_schema.json create mode 100644 ena_objects/json_schemas/person_schema.json create mode 100644 ena_objects/json_schemas/process_parameter_value_schema.json create mode 100644 ena_objects/json_schemas/process_schema.json create mode 100644 ena_objects/json_schemas/protocol_parameter_schema.json create mode 100644 ena_objects/json_schemas/protocol_schema.json create mode 100644 ena_objects/json_schemas/publication_schema.json create mode 100644 ena_objects/json_schemas/sample_schema.json create mode 100644 ena_objects/json_schemas/source_schema.json create mode 100644 ena_objects/json_schemas/study_schema.json create mode 100644 tests/test_data/bad_assay_stream_isa_json.json create mode 100644 tests/test_data/bad_investigation_isa_json.json delete mode 100644 tests/test_data/isa_json_test_investigation.json create mode 100644 tests/test_data/multi_study_multi_assay_stream_investigation.json diff --git a/ena_objects/characteristic.py b/ena_objects/characteristic.py index 9608e56..2fb22bf 100644 --- a/ena_objects/characteristic.py +++ b/ena_objects/characteristic.py @@ -1,6 +1,7 @@ +import json +import os from typing import List, Dict - -from ena_objects.ena_std_lib import validate_dict +import jsonschema class IsaBase: @@ -9,8 +10,16 @@ class IsaBase: """ @classmethod - def check_dict_keys(self, dict: Dict[str, str], mandatory_keys): - [validate_dict(dict=dict, key=key) for key in mandatory_keys] + def validate_json(self, isa_json: Dict[str, str], schema): + schema_path = os.path.join(os.curdir, "ena_objects", "json_schemas", schema) + + json_file = open(schema_path) + json_schema = json.load(json_file) + + jsonschema.validate( + isa_json, + json_schema, + ) def fetch_category_name(categories: Dict[str, str], name: str) -> str: @@ -42,7 +51,6 @@ class Characteristic(IsaBase): This is the generic base class of a characteristics object. """ - mandatory_keys = ["category", "value", "unit"] parameters = [] def __init__(self, category: Dict, value: str) -> None: @@ -60,7 +68,6 @@ def from_dict(self, dict: Dict[str, str], categories: List[Dict[str, str]]) -> N Returns: Characteristic: _description_ """ - super().check_dict_keys(dict, self.mandatory_keys) return self( category=category_dict(dict["category"], categories), value=dict["value"]["annotationValue"], diff --git a/ena_objects/ena_experiment.py b/ena_objects/ena_experiment.py index 2be7403..cdc7ac6 100644 --- a/ena_objects/ena_experiment.py +++ b/ena_objects/ena_experiment.py @@ -167,12 +167,6 @@ class EnaExperiment(IsaBase): Generates an Experiment object, compliant to the requirements of ENA """ - mandatory_keys = [ - "filename", - "measurementType", - "processSequence", - "comments", - ] prefix = "ena_experiment_alias_prefix" def __init__( @@ -225,7 +219,6 @@ def from_assay_stream( Returns: EnaExperiment: Resulting EnaExperiment object """ - super().check_dict_keys(assay_stream, self.mandatory_keys) other_materials = get_other_materials(assay_stream) parameter_values = get_parameter_values(assay_stream, protocols_dict) diff --git a/ena_objects/ena_run.py b/ena_objects/ena_run.py index 0104e3c..058bf29 100644 --- a/ena_objects/ena_run.py +++ b/ena_objects/ena_run.py @@ -32,9 +32,6 @@ def from_dict(self, comments_dict: Dict[str, str]) -> None: Returns: DataFileComment: Resulting DataFileComment """ - for comment in comments_dict: - super().check_dict_keys(dict=comment, mandatory_keys=self.mandatory_keys) - return [ DataFileComment(name=comment["name"], value=comment["value"]) for comment in comments_dict @@ -49,8 +46,6 @@ class DataFile(IsaBase): Extends the IsaBase class. """ - mandatory_keys = ["@id", "name", "type", "comments"] - def __init__(self, id, name, type, comments, derived_experiment_id) -> None: super().__init__() self.id: str = id @@ -72,7 +67,6 @@ def from_data_file_dict( Returns: DataFile: Resulting DataFile """ - super().check_dict_keys(data_file_dict, self.mandatory_keys) return DataFile( id=data_file_dict["@id"], name=data_file_dict["name"], @@ -146,7 +140,6 @@ class EnaRun(IsaBase): Generates a Run object, compliant to the requirements of ENA. """ - mandatory_keys = ["dataFiles", "processSequence"] prefix = "ena_run_alias_prefix" def __init__( @@ -172,7 +165,6 @@ def from_assay_stream(self, assay_stream: Dict[str, str]) -> None: """ ena_runs = [] - super().check_dict_keys(assay_stream, self.mandatory_keys) sample_datafile_associations = get_assay_sample_associations(assay_stream) prefix = fetch_assay_comment_by_name(assay_stream, EnaRun.prefix)["value"] diff --git a/ena_objects/ena_std_lib.py b/ena_objects/ena_std_lib.py index 5cd56c8..0448bd4 100644 --- a/ena_objects/ena_std_lib.py +++ b/ena_objects/ena_std_lib.py @@ -2,20 +2,6 @@ import re -def validate_dict(dict: Dict, key: str) -> None: - """Raises an error if the structure of the ISA JSON Dict is not conform - - Args: - isa_json (Dict): The ISA JSON to validate - key (str): The key to check - - Raises: - KeyError: Will display the missing key in the Dict - """ - if key not in dict.keys(): - raise KeyError(f"{key} was not found in the provided ISA JSON.") - - def get_assay_sample_associations(assay_dict: Dict[str, str]) -> List[Dict[str, str]]: """Fetches the list of sample assocations in a specified assay dictionary. Each dictionary contains a list of input ids and output ids. diff --git a/ena_objects/ena_study.py b/ena_objects/ena_study.py index 4c6fc70..e589a71 100644 --- a/ena_objects/ena_study.py +++ b/ena_objects/ena_study.py @@ -44,7 +44,6 @@ def study_abstract(assay_stream: Dict[str, str]) -> str: class EnaStudy(IsaBase): """Generates a Study object, compliant to the requirements of ENA""" - mandatory_keys = ["filename", "comments", "materials"] prefix = "ena_study_alias_prefix" def __init__( @@ -84,8 +83,6 @@ def from_assay_stream(self, assay_stream: Dict[str, str], pubmed_ids): Returns: EnaStudy: EnaStudy object """ - super().check_dict_keys(assay_stream, self.mandatory_keys) - return EnaStudy( alias=study_alias(assay_stream), title=study_title(assay_stream), diff --git a/ena_objects/ena_submission.py b/ena_objects/ena_submission.py index a591d36..01f3340 100644 --- a/ena_objects/ena_submission.py +++ b/ena_objects/ena_submission.py @@ -45,11 +45,17 @@ def filter_assays( return new_isa_json +def validate_isa_json(isa_json: Dict[str, str]): + IsaBase.validate_json(isa_json, EnaSubmission.investigation_schema) + + class EnaSubmission(IsaBase): """ Wrapper objects, holding studies """ + investigation_schema = "investigation_schema.json" + def __init__( self, studies: List[EnaStudy] = [], @@ -74,6 +80,8 @@ def from_isa_json( Returns: EnaSubmission: resulting EnaSubmission """ + validate_isa_json(isa_json) + filtered_isa_json: Dict[str, str] = filter_assays(isa_json, required_assays) samples = [] studies = [] diff --git a/ena_objects/json_schemas/assay_schema.json b/ena_objects/json_schemas/assay_schema.json new file mode 100644 index 0000000..fd82924 --- /dev/null +++ b/ena_objects/json_schemas/assay_schema.json @@ -0,0 +1,88 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "Assay JSON Schema", + "name": "Assay JSON Schema", + "description": "JSON Schema describing an Assay", + "@context": { + "@base": "http://purl.org/isaterms/", + "xsd": "http://www.w3.org/2001/XMLSchema#" + }, + "type": "object", + "properties": { + "@id": { + "type": "string", + "format": "uri" + }, + "filename": { + "type": "string" + }, + "measurementType": { + "$dynamicRef": "ontology_annotation_schema.json#" + }, + "technologyType": { + "type": "object", + "properties": { + "ontologyAnnotation": { + "$dynamicRef": "ontology_annotation_schema.json#" + } + } + }, + "technologyPlatform": { + "type": "string" + }, + "dataFiles": { + "type": "array", + "items": { + "$dynamicRef": "data_schema.json#" + } + }, + "materials": { + "type": "object", + "properties": { + "samples": { + "type": "array", + "items": { + "$dynamicRef": "sample_schema.json#" + } + }, + "otherMaterials": { + "type": "array", + "items": { + "$dynamicRef": "material_schema.json#" + } + } + } + }, + "characteristicCategories": { + "description": "List of all the characteristics categories (or material attributes) defined in the study, used to avoid duplication of their declaration when each material_attribute_value is created. ", + "type": "array", + "items": { + "$dynamicRef": "material_attribute_schema.json#" + } + }, + "unitCategories": { + "description": "List of all the unitsdefined in the study, used to avoid duplication of their declaration when each value is created. ", + "type": "array", + "items": { + "$dynamicRef": "ontology_annotation_schema.json#" + } + }, + "processSequence": { + "type": "array", + "items": { + "$dynamicRef": "process_schema.json#" + } + }, + "comments": { + "type": "array", + "items": { + "$dynamicRef": "comment_schema.json#" + } + } + }, + "additionalProperties": false, + "required": [ + "processSequence", + "dataFiles" + ] +} \ No newline at end of file diff --git a/ena_objects/json_schemas/comment_schema.json b/ena_objects/json_schemas/comment_schema.json new file mode 100644 index 0000000..1ff9c12 --- /dev/null +++ b/ena_objects/json_schemas/comment_schema.json @@ -0,0 +1,16 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema", + "title": "ISA comment schema - it corresponds to ISA Comment[] construct", + "description": "JSON-schema representing a comment in the ISA model", + "type": "object", + "properties": { + "@id": { "type": "string", "format": "uri" }, + "name": { + "type": "string" + }, + "value": { + "type": "string" + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/ena_objects/json_schemas/data_schema.json b/ena_objects/json_schemas/data_schema.json new file mode 100644 index 0000000..3b6b483 --- /dev/null +++ b/ena_objects/json_schemas/data_schema.json @@ -0,0 +1,42 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema", + "title": "ISA data schema", + "description": "JSON-schema representing a data file in the ISA model", + "type": "object", + "properties": { + "@id": { + "type": "string", + "format": "uri" + }, + "name": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "Raw Data File", + "Derived Data File", + "Image File", + "Acquisition Parameter Data File", + "Derived Spectral Data File", + "Protein Assignment File", + "Raw Spectral Data File", + "Peptide Assignment File", + "Array Data File", + "Derived Array Data File", + "Post Translational Modification Assignment File", + "Derived Array Data Matrix File", + "Free Induction Decay Data File", + "Metabolite Assignment File", + "Array Data Matrix File" + ] + }, + "comments": { + "type": "array", + "items": { + "$dynamicRef": "comment_schema.json#" + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/ena_objects/json_schemas/factor_schema.json b/ena_objects/json_schemas/factor_schema.json new file mode 100644 index 0000000..c63abe2 --- /dev/null +++ b/ena_objects/json_schemas/factor_schema.json @@ -0,0 +1,26 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema", + "title": "ISA factor schema", + "name": "ISA factor schema", + "description": "JSON-schema representing a factor value in the ISA model", + "type": "object", + "properties": { + "@id": { + "type": "string", + "format": "uri" + }, + "factorName": { + "type": "string" + }, + "factorType": { + "$dynamicRef": "ontology_annotation_schema.json#" + }, + "comments": { + "type": "array", + "items": { + "$dynamicRef": "comment_schema.json#" + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/ena_objects/json_schemas/factor_value_schema.json b/ena_objects/json_schemas/factor_value_schema.json new file mode 100644 index 0000000..ea4ec87 --- /dev/null +++ b/ena_objects/json_schemas/factor_value_schema.json @@ -0,0 +1,38 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema", + "title": "ISA factor value schema", + "description": "JSON-schema representing a factor value in the ISA model", + "type": "object", + "properties": { + "@id": { + "type": "string", + "format": "uri" + }, + "category": { + "$dynamicRef": "factor_schema.json#" + }, + "value": { + "anyOf": [ + { + "$dynamicRef": "ontology_annotation_schema.json#" + }, + { + "type": "string" + }, + { + "type": "number" + } + ] + }, + "unit": { + "$dynamicRef": "ontology_annotation_schema.json#" + }, + "comments": { + "type": "array", + "items": { + "$dynamicRef": "comment_schema.json#" + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/ena_objects/json_schemas/investigation_schema.json b/ena_objects/json_schemas/investigation_schema.json new file mode 100644 index 0000000..990236f --- /dev/null +++ b/ena_objects/json_schemas/investigation_schema.json @@ -0,0 +1,82 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema", + "title": "ISA investigation schema", + "description": "JSON-schema representing an investigation in the ISA model", + "type": "object", + "properties": { + "@id": { + "type": "string", + "format": "uri" + }, + "filename": { + "type": "string" + }, + "identifier": { + "type": "string" + }, + "title": { + "type": "string" + }, + "description": { + "type": "string" + }, + "submissionDate": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "string", + "maxLength": 0 + } + ] + }, + "publicReleaseDate": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "string", + "maxLength": 0 + } + ] + }, + "ontologySourceReferences": { + "type": "array", + "items": { + "$dynamicRef": "ontology_source_reference_schema.json#" + } + }, + "publications": { + "type": "array", + "items": { + "$dynamicRef": "publication_schema.json#" + } + }, + "people": { + "type": "array", + "items": { + "$dynamicRef": "person_schema.json#" + } + }, + "studies": { + "type": "array", + "items": { + "$dynamicRef": "study_schema.json#" + } + }, + "comments": { + "type": "array", + "items": { + "$dynamicRef": "comment_schema.json#" + } + } + }, + "additionalProperties": false, + "required": [ + "studies" + ] +} \ No newline at end of file diff --git a/ena_objects/json_schemas/material_attribute_schema.json b/ena_objects/json_schemas/material_attribute_schema.json new file mode 100644 index 0000000..fbd9fd0 --- /dev/null +++ b/ena_objects/json_schemas/material_attribute_schema.json @@ -0,0 +1,16 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema", + "title": "ISA material attribute schema", + "description": "JSON-schema representing a characteristics category (what appears between the brackets in Charactersitics[]) in the ISA model", + "type": "object", + "properties": { + "@id": { + "type": "string", + "format": "uri" + }, + "characteristicType": { + "$dynamicRef": "ontology_annotation_schema.json#" + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/ena_objects/json_schemas/material_attribute_value_schema.json b/ena_objects/json_schemas/material_attribute_value_schema.json new file mode 100644 index 0000000..0cc8956 --- /dev/null +++ b/ena_objects/json_schemas/material_attribute_value_schema.json @@ -0,0 +1,38 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema", + "title": "ISA material attribute schema", + "description": "JSON-schema representing a material attribute (or characteristic) value in the ISA model", + "type": "object", + "properties": { + "@id": { + "type": "string", + "format": "uri" + }, + "category": { + "$dynamicRef": "material_attribute_schema.json#" + }, + "value": { + "anyOf": [ + { + "$dynamicRef": "ontology_annotation_schema.json#" + }, + { + "type": "string" + }, + { + "type": "number" + } + ] + }, + "unit": { + "$dynamicRef": "ontology_annotation_schema.json#" + }, + "comments": { + "type": "array", + "items": { + "$dynamicRef": "comment_schema.json#" + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/ena_objects/json_schemas/material_schema.json b/ena_objects/json_schemas/material_schema.json new file mode 100644 index 0000000..195ae61 --- /dev/null +++ b/ena_objects/json_schemas/material_schema.json @@ -0,0 +1,41 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema", + "title": "ISA material node schema", + "description": "JSON-schema representing a material node in the ISA model, which is not a source or a sample (as they have specific schemas) - this will correspond to 'Extract Name', 'Labeled Extract Name'", + "type": "object", + "properties": { + "@id": { + "type": "string", + "format": "uri" + }, + "name": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "Extract Name", + "Labeled Extract Name" + ] + }, + "characteristics": { + "type": "array", + "items": { + "$dynamicRef": "material_attribute_value_schema.json#" + } + }, + "derivesFrom": { + "type": "array", + "items": { + "$dynamicRef": "material_schema.json#" + } + }, + "comments": { + "type": "array", + "items": { + "$dynamicRef": "comment_schema.json#" + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/ena_objects/json_schemas/ontology_annotation_schema.json b/ena_objects/json_schemas/ontology_annotation_schema.json new file mode 100644 index 0000000..988a6d5 --- /dev/null +++ b/ena_objects/json_schemas/ontology_annotation_schema.json @@ -0,0 +1,38 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema", + "title": "ISA ontology reference schema", + "name": "ISA ontology reference schema", + "description": "JSON-schema representing an ontology reference or annotation in the ISA model (for fields that are required to be ontology annotations)", + "type": "object", + "properties": { + "@id": { + "type": "string", + "format": "uri" + }, + "annotationValue": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "number" + } + ] + }, + "termSource": { + "type": "string", + "description": "The abbreviated ontology name. It should correspond to one of the sources as specified in the ontologySourceReference section of the Investigation." + }, + "termAccession": { + "type": "string", + "format": "uri" + }, + "comments": { + "type": "array", + "items": { + "$dynamicRef": "comment_schema.json#" + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/ena_objects/json_schemas/ontology_source_reference_schema.json b/ena_objects/json_schemas/ontology_source_reference_schema.json new file mode 100644 index 0000000..43666aa --- /dev/null +++ b/ena_objects/json_schemas/ontology_source_reference_schema.json @@ -0,0 +1,28 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema", + "title": "ISA ontology source reference schema", + "name": "ISA ontology source reference schema", + "description": "JSON-schema representing an ontology reference in the ISA model", + "type": "object", + "properties": { + "description": { + "type": "string" + }, + "file": { + "type": "string" + }, + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "comments": { + "type": "array", + "items": { + "$dynamicRef": "comment_schema.json#" + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/ena_objects/json_schemas/organization_schema.json b/ena_objects/json_schemas/organization_schema.json new file mode 100644 index 0000000..e71aab1 --- /dev/null +++ b/ena_objects/json_schemas/organization_schema.json @@ -0,0 +1,12 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema", + "title" : "ISA organization schema", + "description" : "JSON-schema representing an organization in the ISA model v1.0", + "type" : "object", + "properties" : { + "name" : { + "type" : "string" + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/ena_objects/json_schemas/person_schema.json b/ena_objects/json_schemas/person_schema.json new file mode 100644 index 0000000..fd9ad6c --- /dev/null +++ b/ena_objects/json_schemas/person_schema.json @@ -0,0 +1,50 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema", + "title": "ISA person schema", + "description": "JSON-schema representing a person in the ISA model", + "type": "object", + "properties": { + "@id": { + "type": "string", + "format": "uri" + }, + "lastName": { + "type": "string" + }, + "firstName": { + "type": "string" + }, + "midInitials": { + "type": "string" + }, + "email": { + "type": "string", + "format": "email" + }, + "phone": { + "type": "string" + }, + "fax": { + "type": "string" + }, + "address": { + "type": "string" + }, + "affiliation": { + "type": "string" + }, + "roles": { + "type": "array", + "items": { + "$dynamicRef": "ontology_annotation_schema.json#" + } + }, + "comments": { + "type": "array", + "items": { + "$dynamicRef": "comment_schema.json#" + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/ena_objects/json_schemas/process_parameter_value_schema.json b/ena_objects/json_schemas/process_parameter_value_schema.json new file mode 100644 index 0000000..9d5826d --- /dev/null +++ b/ena_objects/json_schemas/process_parameter_value_schema.json @@ -0,0 +1,34 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema", + "title": "ISA process parameter value schema", + "description": "JSON-schema representing a Parameter Value (associated with a Protocol REF) in the ISA model", + "type": "object", + "properties": { + "category": { + "$dynamicRef": "protocol_parameter_schema.json#" + }, + "value": { + "anyOf": [ + { + "$dynamicRef": "ontology_annotation_schema.json#" + }, + { + "type": "string" + }, + { + "type": "number" + } + ] + }, + "unit": { + "$dynamicRef": "ontology_annotation_schema.json#" + }, + "comments": { + "type": "array", + "items": { + "$dynamicRef": "comment_schema.json#" + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/ena_objects/json_schemas/process_schema.json b/ena_objects/json_schemas/process_schema.json new file mode 100644 index 0000000..92ea112 --- /dev/null +++ b/ena_objects/json_schemas/process_schema.json @@ -0,0 +1,87 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema", + "title": "ISA process or protocol application schema, corresponds to 'Protocol REF' columns in the study and assay files", + "description": "JSON-schema representing a protocol application in the ISA model", + "type": "object", + "properties": { + "@id": { + "type": "string", + "format": "uri" + }, + "name": { + "type": "string" + }, + "executesProtocol": { + "$dynamicRef": "protocol_schema.json#" + }, + "parameterValues": { + "type": "array", + "items": { + "$dynamicRef": "process_parameter_value_schema.json#" + } + }, + "performer": { + "type": "string" + }, + "date": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "string", + "maxLength": 0 + } + ] + }, + "previousProcess": { + "$dynamicRef": "process_schema.json#" + }, + "nextProcess": { + "$dynamicRef": "process_schema.json#" + }, + "inputs": { + "type": "array", + "items": { + "anyOf": [ + { + "$dynamicRef": "source_schema.json#" + }, + { + "$dynamicRef": "sample_schema.json#" + }, + { + "$dynamicRef": "data_schema.json#" + }, + { + "$dynamicRef": "material_schema.json#" + } + ] + } + }, + "outputs": { + "type": "array", + "items": { + "anyOf": [ + { + "$dynamicRef": "sample_schema.json#" + }, + { + "$dynamicRef": "data_schema.json#" + }, + { + "$dynamicRef": "material_schema.json#" + } + ] + } + }, + "comments": { + "type": "array", + "items": { + "$dynamicRef": "comment_schema.json#" + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/ena_objects/json_schemas/protocol_parameter_schema.json b/ena_objects/json_schemas/protocol_parameter_schema.json new file mode 100644 index 0000000..30fd5d9 --- /dev/null +++ b/ena_objects/json_schemas/protocol_parameter_schema.json @@ -0,0 +1,22 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema", + "title": "ISA protocol parameter schema", + "description": "JSON-schema representing a parameter for a protocol (category declared in the investigation file) in the ISA model", + "type": "object", + "properties": { + "@id": { + "type": "string", + "format": "uri" + }, + "parameterName": { + "$dynamicRef": "ontology_annotation_schema.json#" + }, + "comments": { + "type": "array", + "items": { + "$dynamicRef": "comment_schema.json#" + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/ena_objects/json_schemas/protocol_schema.json b/ena_objects/json_schemas/protocol_schema.json new file mode 100644 index 0000000..f859100 --- /dev/null +++ b/ena_objects/json_schemas/protocol_schema.json @@ -0,0 +1,62 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema", + "title": "ISA protocol schema", + "name": "ISA protocol schema", + "description": "JSON-schema representing a protocol in the ISA model", + "type": "object", + "properties": { + "@id": { + "type": "string", + "format": "uri" + }, + "name": { + "type": "string" + }, + "protocolType": { + "$dynamicRef": "ontology_annotation_schema.json#" + }, + "description": { + "type": "string" + }, + "uri": { + "type": "string", + "format": "uri" + }, + "version": { + "type": "string" + }, + "parameters": { + "type": "array", + "items": { + "$dynamicRef": "protocol_parameter_schema.json#" + } + }, + "components": { + "type": "array", + "items": { + "type": "object", + "properties": { + "componentName": { + "type": "string" + }, + "componentType": { + "$dynamicRef": "ontology_annotation_schema.json#" + }, + "comments": { + "type": "array", + "items": { + "$dynamicRef": "comment_schema.json#" + } + } + } + } + }, + "comments": { + "type": "array", + "items": { + "$dynamicRef": "comment_schema.json#" + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/ena_objects/json_schemas/publication_schema.json b/ena_objects/json_schemas/publication_schema.json new file mode 100644 index 0000000..adca8ff --- /dev/null +++ b/ena_objects/json_schemas/publication_schema.json @@ -0,0 +1,31 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema", + "title": "ISA investigation schema", + "name": "ISA investigation schema", + "description": "JSON-schema representing an investigation in the ISA model", + "type": "object", + "properties": { + "pubMedID": { + "type": "string" + }, + "doi": { + "type": "string" + }, + "authorList": { + "type": "string" + }, + "title": { + "type": "string" + }, + "status": { + "$dynamicRef": "ontology_annotation_schema.json#" + }, + "comments": { + "type": "array", + "items": { + "$dynamicRef": "comment_schema.json#" + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/ena_objects/json_schemas/sample_schema.json b/ena_objects/json_schemas/sample_schema.json new file mode 100644 index 0000000..2ea8e26 --- /dev/null +++ b/ena_objects/json_schemas/sample_schema.json @@ -0,0 +1,40 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema", + "title": "ISA sample schema", + "description": "JSON-schema representing a sample in the ISA model. A sample represents a major output resulting from a protocol application other than the special case outputs of Extract or a Labeled Extract.", + "type": "object", + "properties": { + "@id": { + "type": "string", + "format": "uri" + }, + "name": { + "type": "string" + }, + "characteristics": { + "type": "array", + "items": { + "$dynamicRef": "material_attribute_value_schema.json#" + } + }, + "factorValues": { + "type": "array", + "items": { + "$dynamicRef": "factor_value_schema.json#" + } + }, + "derivesFrom": { + "type": "array", + "items": { + "$dynamicRef": "source_schema.json#" + } + }, + "comments": { + "type": "array", + "items": { + "$dynamicRef": "comment_schema.json#" + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/ena_objects/json_schemas/source_schema.json b/ena_objects/json_schemas/source_schema.json new file mode 100644 index 0000000..631a930 --- /dev/null +++ b/ena_objects/json_schemas/source_schema.json @@ -0,0 +1,27 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema", + "title": "ISA source schema", + "description": "JSON-schema representing a source in the ISA model. Sources are considered as the starting biological material used in a study.", + "properties": { + "@id": { + "type": "string", + "format": "uri" + }, + "name": { + "type": "string" + }, + "characteristics": { + "type": "array", + "items": { + "$dynamicRef": "material_attribute_value_schema.json#" + } + }, + "comments": { + "type": "array", + "items": { + "$dynamicRef": "comment_schema.json#" + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/ena_objects/json_schemas/study_schema.json b/ena_objects/json_schemas/study_schema.json new file mode 100644 index 0000000..95587d1 --- /dev/null +++ b/ena_objects/json_schemas/study_schema.json @@ -0,0 +1,141 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "Study JSON Schema", + "description": "JSON Schema describing an Study", + "@context": { + "@base": "http://purl.org/isaterms/", + "xsd": "http://www.w3.org/2001/XMLSchema#" + }, + "type": "object", + "properties": { + "@id": { + "type": "string", + "format": "uri" + }, + "filename": { + "type": "string" + }, + "identifier": { + "type": "string" + }, + "title": { + "type": "string" + }, + "description": { + "type": "string" + }, + "submissionDate": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "string", + "maxLength": 0 + } + ] + }, + "publicReleaseDate": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "string", + "maxLength": 0 + } + ] + }, + "publications": { + "type": "array", + "items": { + "$dynamicRef": "publication_schema.json#" + } + }, + "people": { + "type": "array", + "items": { + "$dynamicRef": "person_schema.json#" + } + }, + "studyDesignDescriptors": { + "type": "array", + "items": { + "$dynamicRef": "ontology_annotation_schema.json#" + } + }, + "protocols": { + "type": "array", + "items": { + "$dynamicRef": "protocol_schema.json#" + } + }, + "materials": { + "type": "object", + "properties": { + "sources": { + "type": "array", + "items": { + "$dynamicRef": "source_schema.json#" + } + }, + "samples": { + "type": "array", + "items": { + "$dynamicRef": "sample_schema.json#" + } + }, + "otherMaterials": { + "type": "array", + "items": { + "$dynamicRef": "material_schema.json#" + } + } + } + }, + "processSequence": { + "type": "array", + "items": { + "$dynamicRef": "process_schema.json#" + } + }, + "assays": { + "type": "array", + "items": { + "$dynamicRef": "assay_schema.json#" + } + }, + "factors": { + "type": "array", + "items": { + "$dynamicRef": "factor_schema.json#" + } + }, + "characteristicCategories": { + "description": "List of all the characteristics categories (or material attributes) defined in the study, used to avoid duplication of their declaration when each material_attribute_value is created. ", + "type": "array", + "items": { + "$dynamicRef": "material_attribute_schema.json#" + } + }, + "unitCategories": { + "description": "List of all the unitsdefined in the study, used to avoid duplication of their declaration when each value is created. ", + "type": "array", + "items": { + "$dynamicRef": "ontology_annotation_schema.json#" + } + }, + "comments": { + "type": "array", + "items": { + "$dynamicRef": "comment_schema.json#" + } + } + }, + "additionalProperties": false, + "required": [ + "assays" + ] +} \ No newline at end of file diff --git a/ena_objects/other_material.py b/ena_objects/other_material.py index c43acef..a962106 100644 --- a/ena_objects/other_material.py +++ b/ena_objects/other_material.py @@ -1,4 +1,3 @@ -from ena_objects.ena_std_lib import validate_dict from ena_objects.characteristic import ( IsaBase, ParameterValue, @@ -13,8 +12,6 @@ class OtherMaterial(IsaBase): This Class represents an 'other_material' in the ISA JSON and extends the ISA Base class. """ - mandatory_keys = ["id", "name", "type", "other_material_characteristics"] - def __init__( self, id: int, diff --git a/requirements.txt b/requirements.txt index 8958e29..39a9173 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ pandas>=2.0.3 , <= 3.0.0 pyyaml==5.* requests>=2.31.0 , <= 3.0.0 openpyxl>=3.1.2 , <= 4.0.0 +jsonschema>=4.19.1 diff --git a/tests/test_data/bad_assay_stream_isa_json.json b/tests/test_data/bad_assay_stream_isa_json.json new file mode 100644 index 0000000..ca2c3fe --- /dev/null +++ b/tests/test_data/bad_assay_stream_isa_json.json @@ -0,0 +1,5067 @@ +{ + "identifier": "", + "title": "Test Investigation original", + "description": "This investigation uses the Standard ENA templates.", + "submissionDate": "", + "publicReleaseDate": "", + "ontologySourceReferences": [], + "filename": "Test Investigation original.txt", + "comments": [ + { + "name": "ISAjson export time", + "value": "2023-10-13T06:24:54Z" + }, + { + "name": "SEEK Project name", + "value": "Test Project ENA upload Tool" + }, + { + "name": "SEEK Project ID", + "value": "http://localhost:3000/single_pages/16" + }, + { + "name": "SEEK Investigation ID", + "value": "19" + } + ], + "publications": [], + "people": [ + { + "@id": "#people/4", + "lastName": "De Pelseneer", + "firstName": "Kevin", + "midInitials": "", + "email": "kevin.depelseneer@psb.ugent.be", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { + "termAccession": "", + "termSource": "", + "annotationValue": "" + } + ], + "comments": [ + { + "@id": "", + "value": "", + "name": "" + } + ] + } + ], + "studies": [ + { + "identifier": "", + "title": "ENA Study 1", + "description": "This is the first ENA Study.", + "submissionDate": "", + "publicReleaseDate": "", + "filename": "ENA Study 1.txt", + "comments": [ + { + "@id": "#study_comment/19_8_49", + "name": "ena_sample_alias_prefix", + "value": "https://datahub.elixir-belgium.org/samples/" + }, + { + "@id": "#study_comment/19_255b7d70-4bbf-013c-6732-7a163e608de1", + "name": "SEEK Study ID", + "value": "19" + }, + { + "@id": "#study_comment/19_255b8560-4bbf-013c-6733-7a163e608de1", + "name": "SEEK creation date", + "value": "2023-10-09T13:39:46Z" + } + ], + "publications": [ + { + "pubMedID": 5, + "doi": "", + "status": { + "termAccession": "", + "termSource": "", + "annotationValue": "" + }, + "title": "Test publication 1", + "author_list": "", + "comments": [ + { + "@id": "#publication_comment/1_27", + "name": "linked_assays", + "value": "[{\"id\":27,\"title\":\"ENA Library construction - Study 1 - Stream 1\"}]" + }, + { + "@id": "#publication_comment/1_7", + "name": "linked_studies", + "value": "[{\"id\":7,\"title\":\"Test Study in Single Page\"}]" + } + ] + }, + { + "pubMedID": 6, + "doi": "", + "status": { + "termAccession": "", + "termSource": "", + "annotationValue": "" + }, + "title": "Test publication 2", + "author_list": "", + "comments": [ + { + "@id": "#publication_comment/2_29", + "name": "linked_assays", + "value": "[{\"id\":29,\"title\":\"ENA Library construction - Study 1 - Stream 2\"}]" + }, + { + "@id": "#publication_comment/2_", + "name": "linked_studies", + "value": "[]" + } + ] + } + ], + "people": [ + { + "@id": "#people/4", + "lastName": "De Pelseneer", + "firstName": "Kevin", + "midInitials": "", + "email": "kevin.depelseneer@psb.ugent.be", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { + "termAccession": "", + "termSource": "", + "annotationValue": "" + } + ], + "comments": [ + { + "@id": "", + "value": "", + "name": "" + } + ] + } + ], + "studyDesignDescriptors": [], + "characteristicCategories": [ + { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_410", + "characteristicType": { + "annotationValue": "geographic location (country and/or sea)", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/taxon_id_411", + "characteristicType": { + "annotationValue": "taxon_id", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/title_415", + "characteristicType": { + "annotationValue": "title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/sample_description_416", + "characteristicType": { + "annotationValue": "sample_description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/collection_date_417", + "characteristicType": { + "annotationValue": "collection date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/accession_418", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_419", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_420", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "sources": [ + { + "@id": "#source/140", + "name": "Source 1", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_410" + }, + "value": { + "annotationValue": "Afghanistan", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/taxon_id_411" + }, + "value": { + "annotationValue": "1234", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#source/141", + "name": "Source 2", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_410" + }, + "value": { + "annotationValue": "Albania", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/taxon_id_411" + }, + "value": { + "annotationValue": "2345", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ], + "samples": [ + { + "@id": "#sample/142", + "name": "Sample 1", + "derivesFrom": [ + { + "@id": "#source/140" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_415" + }, + "value": { + "annotationValue": "Sample title 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_416" + }, + "value": { + "annotationValue": "Sample description 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_417" + }, + "value": { + "annotationValue": "2023", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_418" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_419" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_420" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/143", + "name": "Sample 2", + "derivesFrom": [ + { + "@id": "#source/140" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_415" + }, + "value": { + "annotationValue": "Sample title 2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_416" + }, + "value": { + "annotationValue": "Sample description 2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_417" + }, + "value": { + "annotationValue": "2022", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_418" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_419" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_420" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/144", + "name": "Sample 3", + "derivesFrom": [ + { + "@id": "#source/141" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_415" + }, + "value": { + "annotationValue": "Sample title 3", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_416" + }, + "value": { + "annotationValue": "Sample description 3", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_417" + }, + "value": { + "annotationValue": "2021", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_418" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_419" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_420" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/145", + "name": "Sample 4", + "derivesFrom": [ + { + "@id": "#source/141" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_415" + }, + "value": { + "annotationValue": "Sample title 4", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_416" + }, + "value": { + "annotationValue": "Sample description 4", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_417" + }, + "value": { + "annotationValue": "2020", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_418" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_419" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_420" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/196", + "name": "Sample 7", + "derivesFrom": [ + { + "@id": "#source/141" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_415" + }, + "value": { + "annotationValue": "Sample title 7", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_416" + }, + "value": { + "annotationValue": "This sample is not used in a ENA assay stream", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_417" + }, + "value": { + "annotationValue": "2016", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_418" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_419" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_420" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ] + }, + "protocols": [ + { + "@id": "#protocol/3_19", + "name": "sample collection", + "protocolType": { + "annotationValue": "sample collection", + "termAccession": "", + "termSource": "" + }, + "description": "This SOP describes a recipe how to make chocolate chip cookies.", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_27", + "name": "library construction", + "protocolType": { + "annotationValue": "library construction", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [ + { + "@id": "#parameter/423", + "parameterName": { + "annotationValue": "library_construction_protocol", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/425", + "parameterName": { + "annotationValue": "design_description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/427", + "parameterName": { + "annotationValue": "library_source", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/428", + "parameterName": { + "annotationValue": "library_strategy", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/429", + "parameterName": { + "annotationValue": "library_selection", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/430", + "parameterName": { + "annotationValue": "library_layout", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/431", + "parameterName": { + "annotationValue": "insert_size", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/432", + "parameterName": { + "annotationValue": "platform", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/433", + "parameterName": { + "annotationValue": "instrument_model", + "termAccession": "", + "termSource": "" + } + } + ], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_28", + "name": "nucleic acid sequencing", + "protocolType": { + "annotationValue": "nucleic acid sequencing", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_29", + "name": "library construction", + "protocolType": { + "annotationValue": "library construction", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [ + { + "@id": "#parameter/448", + "parameterName": { + "annotationValue": "library_construction_protocol", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/450", + "parameterName": { + "annotationValue": "design_description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/452", + "parameterName": { + "annotationValue": "library_source", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/453", + "parameterName": { + "annotationValue": "library_strategy", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/454", + "parameterName": { + "annotationValue": "library_selection", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/455", + "parameterName": { + "annotationValue": "library_layout", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/456", + "parameterName": { + "annotationValue": "insert_size", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/457", + "parameterName": { + "annotationValue": "platform", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/458", + "parameterName": { + "annotationValue": "instrument_model", + "termAccession": "", + "termSource": "" + } + } + ], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_30", + "name": "nucleic acid sequencing", + "protocolType": { + "annotationValue": "nucleic acid sequencing", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + } + ], + "processSequence": [ + { + "@id": "#process/sample_collection/142", + "name": "", + "executesProtocol": { + "@id": "#protocol/3_19" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/140" + } + ], + "outputs": [ + { + "@id": "#sample/142" + } + ] + }, + { + "@id": "#process/sample_collection/143", + "name": "", + "executesProtocol": { + "@id": "#protocol/3_19" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/140" + } + ], + "outputs": [ + { + "@id": "#sample/143" + } + ] + }, + { + "@id": "#process/sample_collection/144", + "name": "", + "executesProtocol": { + "@id": "#protocol/3_19" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/141" + } + ], + "outputs": [ + { + "@id": "#sample/144" + } + ] + }, + { + "@id": "#process/sample_collection/145", + "name": "", + "executesProtocol": { + "@id": "#protocol/3_19" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/141" + } + ], + "outputs": [ + { + "@id": "#sample/145" + } + ] + }, + { + "@id": "#process/sample_collection/196", + "name": "", + "executesProtocol": { + "@id": "#protocol/3_19" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/141" + } + ], + "outputs": [ + { + "@id": "#sample/196" + } + ] + } + ], + "assays": [ + { + "@id": "#assay/27_28", + "filename": "a_ena_stream_1.txt", + "measurementType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "technologyType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "comments": [ + { + "@id": "#assay_comment/19_27_28_9_50", + "name": "ena_study_title", + "value": "Ena Study 1" + }, + { + "@id": "#assay_comment/19_27_28_9_51", + "name": "study_type", + "value": "Whole Genome Sequencing" + }, + { + "@id": "#assay_comment/19_27_28_9_52", + "name": "new_study_type", + "value": "This should not be submitted!" + }, + { + "@id": "#assay_comment/19_27_28_9_53", + "name": "ena_study_abstract", + "value": "This is Ena Study 1." + }, + { + "@id": "#assay_comment/19_27_28_9_54", + "name": "assay_stream", + "value": "Ena stream 1" + }, + { + "@id": "#assay_comment/19_27_28_9_55", + "name": "ena_study_alias_prefix", + "value": "https://datahub.elixir-belgium.org/studies/" + }, + { + "@id": "#assay_comment/19_27_28_9_56", + "name": "ena_experiment_alias_prefix", + "value": "https://datahub.elixir-belgium.org/samples/" + }, + { + "@id": "#assay_comment/19_27_28_9_57", + "name": "ena_run_alias_prefix", + "value": "https://datahub.elixir-belgium.org/samples/" + }, + { + "@id": "#assay_comment/27_28", + "name": "linked_assays", + "value": "[{\"id\":27,\"title\":\"ENA Library construction - Study 1 - Stream 1\"},{\"id\":28,\"title\":\"ENA DNA Sequencing - Study 1 - Stream 1\"}]" + } + ], + "technologyPlatform": "", + "characteristicCategories": [ + { + "@id": "#characteristic_category/title_424", + "characteristicType": { + "annotationValue": "title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/accession_434", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_435", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_436", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "samples": [], + "otherMaterials": [ + { + "@id": "#other_material/146", + "name": "Library 1", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_424" + }, + "value": { + "annotationValue": "Library title 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_434" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_435" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_436" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/142" + } + ] + }, + { + "@id": "#other_material/147", + "name": "Library 2", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_424" + }, + "value": { + "annotationValue": "Library title 2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_434" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_435" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_436" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/143" + } + ] + }, + { + "@id": "#other_material/148", + "name": "Library 3", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_424" + }, + "value": { + "annotationValue": "Library title 3", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_434" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_435" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_436" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/144" + } + ] + }, + { + "@id": "#other_material/149", + "name": "Library 4", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_424" + }, + "value": { + "annotationValue": "Library title 4", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_434" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_435" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_436" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/145" + } + ] + } + ] + }, + "processSequence": [ + { + "@id": "#process/library_construction/146", + "name": "", + "executesProtocol": { + "@id": "#protocol/_27" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/423" + }, + "value": { + "annotationValue": "My special protocol 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/425" + }, + "value": { + "annotationValue": "Library description 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/427" + }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/428" + }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/429" + }, + "value": { + "annotationValue": "RANDOM", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/430" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/431" + }, + "value": { + "annotationValue": "123", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/432" + }, + "value": { + "annotationValue": "LS454", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/433" + }, + "value": { + "annotationValue": "454 GS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/146" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/146" + }, + "inputs": [ + { + "@id": "#sample/142" + } + ], + "outputs": [ + { + "@id": "#sample/146" + } + ] + }, + { + "@id": "#process/library_construction/147", + "name": "", + "executesProtocol": { + "@id": "#protocol/_27" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/423" + }, + "value": { + "annotationValue": "My special protocol 2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/425" + }, + "value": { + "annotationValue": "Library description 2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/427" + }, + "value": { + "annotationValue": "GENOMIC SINGLE CELL", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/428" + }, + "value": { + "annotationValue": "WGA", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/429" + }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/430" + }, + "value": { + "annotationValue": "PAIRED", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/431" + }, + "value": { + "annotationValue": "234", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/432" + }, + "value": { + "annotationValue": "Illumina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/433" + }, + "value": { + "annotationValue": "Illumina Genome Analyzer", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/147" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/147" + }, + "inputs": [ + { + "@id": "#sample/143" + } + ], + "outputs": [ + { + "@id": "#sample/147" + } + ] + }, + { + "@id": "#process/library_construction/148", + "name": "", + "executesProtocol": { + "@id": "#protocol/_27" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/423" + }, + "value": { + "annotationValue": "My special protocol 3", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/425" + }, + "value": { + "annotationValue": "Library description 3", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/427" + }, + "value": { + "annotationValue": "TRANSCRIPTOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/428" + }, + "value": { + "annotationValue": "WXS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/429" + }, + "value": { + "annotationValue": "RANDOM PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/430" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/431" + }, + "value": { + "annotationValue": "345", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/432" + }, + "value": { + "annotationValue": "PacBio", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/433" + }, + "value": { + "annotationValue": "PacBio RS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/148" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/148" + }, + "inputs": [ + { + "@id": "#sample/144" + } + ], + "outputs": [ + { + "@id": "#sample/148" + } + ] + }, + { + "@id": "#process/library_construction/149", + "name": "", + "executesProtocol": { + "@id": "#protocol/_27" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/423" + }, + "value": { + "annotationValue": "My special protocol 4", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/425" + }, + "value": { + "annotationValue": "Library description 4", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/427" + }, + "value": { + "annotationValue": "TRANSCRIPTOMIC SINGLE CELL", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/428" + }, + "value": { + "annotationValue": "RNA-Seq", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/429" + }, + "value": { + "annotationValue": "RT-PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/430" + }, + "value": { + "annotationValue": "PAIRED", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/431" + }, + "value": { + "annotationValue": "456", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/432" + }, + "value": { + "annotationValue": "Themo Fisher Scientific", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/433" + }, + "value": { + "annotationValue": "AB 3730xL Genetic Analyzer", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/149" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/149" + }, + "inputs": [ + { + "@id": "#sample/145" + } + ], + "outputs": [ + { + "@id": "#sample/149" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/150", + "name": "", + "executesProtocol": { + "@id": "#protocol/_28" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/150" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/146" + } + ], + "outputs": [ + { + "@id": "#sample/150" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/151", + "name": "", + "executesProtocol": { + "@id": "#protocol/_28" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/151" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/147" + } + ], + "outputs": [ + { + "@id": "#sample/151" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/152", + "name": "", + "executesProtocol": { + "@id": "#protocol/_28" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/152" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/148" + } + ], + "outputs": [ + { + "@id": "#sample/152" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/153", + "name": "", + "executesProtocol": { + "@id": "#protocol/_28" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/153" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/149" + } + ], + "outputs": [ + { + "@id": "#sample/153" + } + ] + } + ], + "dataFiles": [ + { + "@id": "#data_file/150", + "name": "Raw Data File 1", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data_file_1.bam" + }, + { + "name": "file_type", + "value": "bam" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + }, + { + "@id": "#data_file/151", + "name": "Raw Data File 2", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data_file_2.cram" + }, + { + "name": "file_type", + "value": "cram" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + }, + { + "@id": "#data_file/152", + "name": "Raw Data File 3", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data_file_3.fastq" + }, + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + }, + { + "@id": "#data_file/153", + "name": "Raw Data File 4", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data_file_4.sff" + }, + { + "name": "file_type", + "value": "sff" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + } + ], + "unitCategories": [] + }, + { + "@id": "#assay/29_30", + "filename": "a_ena_stream_2.txt", + "measurementType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "technologyType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "comments": [ + { + "@id": "#assay_comment/19_29_30_10_50", + "name": "ena_study_title", + "value": "Ena Study 2" + }, + { + "@id": "#assay_comment/19_29_30_10_51", + "name": "study_type", + "value": "Other" + }, + { + "@id": "#assay_comment/19_29_30_10_52", + "name": "new_study_type", + "value": "My special study type" + }, + { + "@id": "#assay_comment/19_29_30_10_53", + "name": "ena_study_abstract", + "value": "This is Ena Study 2." + }, + { + "@id": "#assay_comment/19_29_30_10_54", + "name": "assay_stream", + "value": "Ena stream 2" + }, + { + "@id": "#assay_comment/19_29_30_10_55", + "name": "ena_study_alias_prefix", + "value": "https://datahub.elixir-belgium.org/studies/" + }, + { + "@id": "#assay_comment/19_29_30_10_56", + "name": "ena_experiment_alias_prefix", + "value": "https://datahub.elixir-belgium.org/samples/" + }, + { + "@id": "#assay_comment/19_29_30_10_57", + "name": "ena_run_alias_prefix", + "value": "https://datahub.elixir-belgium.org/samples/" + }, + { + "@id": "#assay_comment/29_30", + "name": "linked_assays", + "value": "[{\"id\":29,\"title\":\"ENA Library construction - Study 1 - Stream 2\"},{\"id\":30,\"title\":\"ENA DNA Sequencing - Study 1 - Stream 2\"}]" + } + ], + "technologyPlatform": "", + "characteristicCategories": [ + { + "@id": "#characteristic_category/title_449", + "characteristicType": { + "annotationValue": "title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/accession_459", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_460", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_461", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "samples": [], + "otherMaterials": [ + { + "@id": "#other_material/154", + "name": "Library 5", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_449" + }, + "value": { + "annotationValue": "Library title 5", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_459" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_460" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_461" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/142" + } + ] + }, + { + "@id": "#other_material/155", + "name": "Library 6", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_449" + }, + "value": { + "annotationValue": "Library title 6", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_459" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_460" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_461" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/143" + } + ] + }, + { + "@id": "#other_material/156", + "name": "Library 7", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_449" + }, + "value": { + "annotationValue": "Library title 7", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_459" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_460" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_461" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/144" + } + ] + }, + { + "@id": "#other_material/157", + "name": "Library 8", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_449" + }, + "value": { + "annotationValue": "Library title 8", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_459" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_460" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_461" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/145" + } + ] + } + ] + }, + "processSequence": [ + { + "@id": "#process/library_construction/154", + "name": "", + "executesProtocol": { + "@id": "#protocol/_29" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/448" + }, + "value": { + "annotationValue": "My library construction protocol 5", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/450" + }, + "value": { + "annotationValue": "Library design description 5", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/452" + }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/453" + }, + "value": { + "annotationValue": "ssRNA-seq", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/454" + }, + "value": { + "annotationValue": "HMPR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/455" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/456" + }, + "value": { + "annotationValue": "123", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/457" + }, + "value": { + "annotationValue": "LS454", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/458" + }, + "value": { + "annotationValue": "454 GS 20", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/154" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/154" + }, + "inputs": [ + { + "@id": "#sample/142" + } + ], + "outputs": [ + { + "@id": "#sample/154" + } + ] + }, + { + "@id": "#process/library_construction/155", + "name": "", + "executesProtocol": { + "@id": "#protocol/_29" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/448" + }, + "value": { + "annotationValue": "My library construction protocol 6", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/450" + }, + "value": { + "annotationValue": "Library design description 6", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/452" + }, + "value": { + "annotationValue": "GENOMIC SINGLE CELL", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/453" + }, + "value": { + "annotationValue": "miRNA-Seq", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/454" + }, + "value": { + "annotationValue": "MF", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/455" + }, + "value": { + "annotationValue": "PAIRED", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/456" + }, + "value": { + "annotationValue": "234", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/457" + }, + "value": { + "annotationValue": "Illumina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/458" + }, + "value": { + "annotationValue": "Illumina Genome Analyzer II", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/155" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/155" + }, + "inputs": [ + { + "@id": "#sample/143" + } + ], + "outputs": [ + { + "@id": "#sample/155" + } + ] + }, + { + "@id": "#process/library_construction/156", + "name": "", + "executesProtocol": { + "@id": "#protocol/_29" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/448" + }, + "value": { + "annotationValue": "My library construction protocol 7", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/450" + }, + "value": { + "annotationValue": "Library design description 7", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/452" + }, + "value": { + "annotationValue": "TRANSCRIPTOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/453" + }, + "value": { + "annotationValue": "ncRNA-Seq", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/454" + }, + "value": { + "annotationValue": "size fractionation", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/455" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/456" + }, + "value": { + "annotationValue": "345", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/457" + }, + "value": { + "annotationValue": "PacBio", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/458" + }, + "value": { + "annotationValue": "PacBio RS II", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/156" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/156" + }, + "inputs": [ + { + "@id": "#sample/144" + } + ], + "outputs": [ + { + "@id": "#sample/156" + } + ] + }, + { + "@id": "#process/library_construction/157", + "name": "", + "executesProtocol": { + "@id": "#protocol/_29" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/448" + }, + "value": { + "annotationValue": "My library construction protocol 8", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/450" + }, + "value": { + "annotationValue": "Library design description 8", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/452" + }, + "value": { + "annotationValue": "SYNTHETIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/453" + }, + "value": { + "annotationValue": "FL-cDNA", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/454" + }, + "value": { + "annotationValue": "repeat fractionation", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/455" + }, + "value": { + "annotationValue": "PAIRED", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/456" + }, + "value": { + "annotationValue": "456", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/457" + }, + "value": { + "annotationValue": "Themo Fisher Scientific", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/458" + }, + "value": { + "annotationValue": "AB 3730 Genetic Analyzer", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/157" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/157" + }, + "inputs": [ + { + "@id": "#sample/145" + } + ], + "outputs": [ + { + "@id": "#sample/157" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/158", + "name": "", + "executesProtocol": { + "@id": "#protocol/_30" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/158" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/154" + } + ], + "outputs": [ + { + "@id": "#sample/158" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/159", + "name": "", + "executesProtocol": { + "@id": "#protocol/_30" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/159" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/155" + } + ], + "outputs": [ + { + "@id": "#sample/159" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/160", + "name": "", + "executesProtocol": { + "@id": "#protocol/_30" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/160" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/156" + } + ], + "outputs": [ + { + "@id": "#sample/160" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/161", + "name": "", + "executesProtocol": { + "@id": "#protocol/_30" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/161" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/157" + } + ], + "outputs": [ + { + "@id": "#sample/161" + } + ] + } + ], + "dataFiles": [ + { + "@id": "#data_file/158", + "name": "Raw Data File 5", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data file 5.bam" + }, + { + "name": "file_type", + "value": "bam" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + }, + { + "@id": "#data_file/159", + "name": "Raw Data File 6", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data file 6.cram" + }, + { + "name": "file_type", + "value": "cram" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + }, + { + "@id": "#data_file/160", + "name": "Raw Data File 7", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data file 7.fastq" + }, + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + }, + { + "@id": "#data_file/161", + "name": "Raw Data File 8", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data file 8.sff" + }, + { + "name": "file_type", + "value": "sff" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + } + ], + "unitCategories": [] + } + ], + "factors": [], + "unitCategories": [] + }, + { + "identifier": "", + "title": "ENA Study 2", + "description": "ENA Study 2 - description", + "submissionDate": "", + "publicReleaseDate": "", + "filename": "ENA Study 2.txt", + "comments": [ + { + "@id": "#study_comment/20_11_49", + "name": "ena_sample_alias_prefix", + "value": "https://datahub.elixir-belgium.org/samples/" + }, + { + "@id": "#study_comment/20_259d4380-4bbf-013c-6734-7a163e608de1", + "name": "SEEK Study ID", + "value": "20" + }, + { + "@id": "#study_comment/20_259d4a20-4bbf-013c-6735-7a163e608de1", + "name": "SEEK creation date", + "value": "2023-10-09T14:34:40Z" + } + ], + "publications": [ + { + "pubMedID": 7, + "doi": null, + "status": { + "termAccession": "", + "termSource": "", + "annotationValue": "" + }, + "title": "Test Publication 3", + "author_list": "", + "comments": [ + { + "@id": "#publication_comment/3_31", + "name": "linked_assays", + "value": "[{\"id\":31,\"title\":\"ENA Library construction - Study 2 - Stream 1\"}]" + }, + { + "@id": "#publication_comment/3_", + "name": "linked_studies", + "value": "[]" + } + ] + } + ], + "people": [ + { + "@id": "#people/4", + "lastName": "De Pelseneer", + "firstName": "Kevin", + "midInitials": "", + "email": "kevin.depelseneer@psb.ugent.be", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { + "termAccession": "", + "termSource": "", + "annotationValue": "" + } + ], + "comments": [ + { + "@id": "", + "value": "", + "name": "" + } + ] + } + ], + "studyDesignDescriptors": [], + "characteristicCategories": [ + { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_472", + "characteristicType": { + "annotationValue": "geographic location (country and/or sea)", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/taxon_id_473", + "characteristicType": { + "annotationValue": "taxon_id", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/title_477", + "characteristicType": { + "annotationValue": "title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/sample_description_478", + "characteristicType": { + "annotationValue": "sample_description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/collection_date_479", + "characteristicType": { + "annotationValue": "collection date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/accession_480", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_481", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_482", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "sources": [ + { + "@id": "#source/162", + "name": "Source 3", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_472" + }, + "value": { + "annotationValue": "Afghanistan", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/taxon_id_473" + }, + "value": { + "annotationValue": "9876", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#source/163", + "name": "Source 4", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_472" + }, + "value": { + "annotationValue": "Albania", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/taxon_id_473" + }, + "value": { + "annotationValue": "8765", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ], + "samples": [ + { + "@id": "#sample/164", + "name": "Sample 5", + "derivesFrom": [ + { + "@id": "#source/162" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_477" + }, + "value": { + "annotationValue": "Sample title 5", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_478" + }, + "value": { + "annotationValue": "Sample description 5", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_479" + }, + "value": { + "annotationValue": "2019", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_480" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_481" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_482" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/165", + "name": "Sample 6", + "derivesFrom": [ + { + "@id": "#source/163" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_477" + }, + "value": { + "annotationValue": "Sample title 6", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_478" + }, + "value": { + "annotationValue": "Sample description 6", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_479" + }, + "value": { + "annotationValue": "2018", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_480" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_481" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_482" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ] + }, + "protocols": [ + { + "@id": "#protocol/3_20", + "name": "sample collection", + "protocolType": { + "annotationValue": "sample collection", + "termAccession": "", + "termSource": "" + }, + "description": "This SOP describes a recipe how to make chocolate chip cookies.", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_31", + "name": "library construction", + "protocolType": { + "annotationValue": "library construction", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [ + { + "@id": "#parameter/485", + "parameterName": { + "annotationValue": "library_construction_protocol", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/487", + "parameterName": { + "annotationValue": "design_description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/489", + "parameterName": { + "annotationValue": "library_source", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/490", + "parameterName": { + "annotationValue": "library_strategy", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/491", + "parameterName": { + "annotationValue": "library_selection", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/492", + "parameterName": { + "annotationValue": "library_layout", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/493", + "parameterName": { + "annotationValue": "insert_size", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/494", + "parameterName": { + "annotationValue": "platform", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/495", + "parameterName": { + "annotationValue": "instrument_model", + "termAccession": "", + "termSource": "" + } + } + ], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_32", + "name": "nucleic acid sequencing", + "protocolType": { + "annotationValue": "nucleic acid sequencing", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + } + ], + "processSequence": [ + { + "@id": "#process/sample_collection/164", + "name": "", + "executesProtocol": { + "@id": "#protocol/3_20" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/162" + } + ], + "outputs": [ + { + "@id": "#sample/164" + } + ] + }, + { + "@id": "#process/sample_collection/165", + "name": "", + "executesProtocol": { + "@id": "#protocol/3_20" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/163" + } + ], + "outputs": [ + { + "@id": "#sample/165" + } + ] + } + ], + "assays": [ + { + "@id": "#assay/31_32", + "filename": "a_ena_stream_3.txt", + "measurementType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "technologyType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "comments": [ + { + "@id": "#assay_comment/20_31_32_12_50", + "name": "ena_study_title", + "value": "Ena Study 3" + }, + { + "@id": "#assay_comment/20_31_32_12_51", + "name": "study_type", + "value": "Other" + }, + { + "@id": "#assay_comment/20_31_32_12_52", + "name": "new_study_type", + "value": "My other special study type" + }, + { + "@id": "#assay_comment/20_31_32_12_53", + "name": "ena_study_abstract", + "value": "This is Ena Study 3." + }, + { + "@id": "#assay_comment/20_31_32_12_54", + "name": "assay_stream", + "value": "Ena stream 3" + }, + { + "@id": "#assay_comment/20_31_32_12_55", + "name": "ena_study_alias_prefix", + "value": "https://datahub.elixir-belgium.org/studies/" + }, + { + "@id": "#assay_comment/20_31_32_12_56", + "name": "ena_experiment_alias_prefix", + "value": "https://datahub.elixir-belgium.org/samples/" + }, + { + "@id": "#assay_comment/20_31_32_12_57", + "name": "ena_run_alias_prefix", + "value": "https://datahub.elixir-belgium.org/samples/" + }, + { + "@id": "#assay_comment/31_32", + "name": "linked_assays", + "value": "[{\"id\":31,\"title\":\"ENA Library construction - Study 2 - Stream 1\"},{\"id\":32,\"title\":\"ENA DNA Sequencing - Study 2 - Stream 1\"}]" + } + ], + "technologyPlatform": "", + "characteristicCategories": [ + { + "@id": "#characteristic_category/title_486", + "characteristicType": { + "annotationValue": "title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/accession_496", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_497", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_498", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "samples": [], + "otherMaterials": [ + { + "@id": "#other_material/166", + "name": "Library 9", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_486" + }, + "value": { + "annotationValue": "Library title 9", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_496" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_497" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_498" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/164" + } + ] + }, + { + "@id": "#other_material/167", + "name": "Library 10", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_486" + }, + "value": { + "annotationValue": "Library title 10", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_496" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_497" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_498" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/165" + } + ] + } + ] + }, + "processSequence": [ + { + "@id": "#process/library_construction/166", + "name": "", + "executesProtocol": { + "@id": "#protocol/_31" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/485" + }, + "value": { + "annotationValue": "My library construction protocol 9", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/487" + }, + "value": { + "annotationValue": "Library design description 9", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/489" + }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/490" + }, + "value": { + "annotationValue": "EST", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/491" + }, + "value": { + "annotationValue": "MNase", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/492" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/493" + }, + "value": { + "annotationValue": "987", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/494" + }, + "value": { + "annotationValue": "Themo Fisher Scientific", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/495" + }, + "value": { + "annotationValue": "AB 3500xL Genetic Analyzer", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/166" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/166" + }, + "inputs": [ + { + "@id": "#sample/164" + } + ], + "outputs": [ + { + "@id": "#sample/166" + } + ] + }, + { + "@id": "#process/library_construction/167", + "name": "", + "executesProtocol": { + "@id": "#protocol/_31" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/485" + }, + "value": { + "annotationValue": "My library construction protocol 10", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/487" + }, + "value": { + "annotationValue": "Library design description 10", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/489" + }, + "value": { + "annotationValue": "SYNTHETIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/490" + }, + "value": { + "annotationValue": "Hi-C", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/491" + }, + "value": { + "annotationValue": "Oligo-dT", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/492" + }, + "value": { + "annotationValue": "PAIRED", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/493" + }, + "value": { + "annotationValue": "876", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/494" + }, + "value": { + "annotationValue": "LS454", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/495" + }, + "value": { + "annotationValue": "454 GS FLX", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/167" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/167" + }, + "inputs": [ + { + "@id": "#sample/165" + } + ], + "outputs": [ + { + "@id": "#sample/167" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/168", + "name": "", + "executesProtocol": { + "@id": "#protocol/_32" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/168" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/166" + } + ], + "outputs": [ + { + "@id": "#sample/168" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/169", + "name": "", + "executesProtocol": { + "@id": "#protocol/_32" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/169" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/167" + } + ], + "outputs": [ + { + "@id": "#sample/169" + } + ] + } + ], + "unitCategories": [] + } + ], + "factors": [], + "unitCategories": [] + } + ] +} \ No newline at end of file diff --git a/tests/test_data/bad_investigation_isa_json.json b/tests/test_data/bad_investigation_isa_json.json new file mode 100644 index 0000000..528c60a --- /dev/null +++ b/tests/test_data/bad_investigation_isa_json.json @@ -0,0 +1,55 @@ +{ + "identifier": "", + "title": "Test Investigation original", + "description": "This investigation uses the Standard ENA templates.", + "submissionDate": "", + "publicReleaseDate": "", + "ontologySourceReferences": [], + "filename": "Test Investigation original.txt", + "comments": [ + { + "name": "ISAjson export time", + "value": "2023-10-13T06:24:54Z" + }, + { + "name": "SEEK Project name", + "value": "Test Project ENA upload Tool" + }, + { + "name": "SEEK Project ID", + "value": "http://localhost:3000/single_pages/16" + }, + { + "name": "SEEK Investigation ID", + "value": "19" + } + ], + "publications": [], + "people": [ + { + "@id": "#people/4", + "lastName": "De Pelseneer", + "firstName": "Kevin", + "midInitials": "", + "email": "kevin.depelseneer@psb.ugent.be", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { + "termAccession": "", + "termSource": "", + "annotationValue": "" + } + ], + "comments": [ + { + "@id": "", + "value": "", + "name": "" + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/test_data/isa_json_test_investigation.json b/tests/test_data/isa_json_test_investigation.json deleted file mode 100644 index 888fb44..0000000 --- a/tests/test_data/isa_json_test_investigation.json +++ /dev/null @@ -1,2536 +0,0 @@ -{ - "identifier": "", - "title": "ENA upload investigation", - "description": "", - "submissionDate": "", - "publicReleaseDate": "", - "ontologySourceReferences": [], - "filename": "ENA upload investigation.txt", - "comments": [ - { - "name": "ISAjson export time", - "value": "2023-09-25T09:47:35Z" - }, - { - "name": "SEEK Project name", - "value": "ENA Upload Project" - }, - { - "name": "SEEK Project ID", - "value": "http://localhost:3000/single_pages/3" - }, - { - "name": "SEEK Investigation ID", - "value": "2" - } - ], - "publications": [], - "people": [ - { - "@id": "#people/1", - "lastName": "De Pelseneer", - "firstName": "Kevin", - "midInitials": "", - "email": "jklhndvkjdsbvs@mail.com", - "phone": "", - "fax": "", - "address": "", - "affiliation": "", - "roles": [ - { - "termAccession": "", - "termSource": "", - "annotationValue": "" - } - ], - "comments": [ - { - "@id": "", - "value": "", - "name": "" - } - ] - } - ], - "studies": [ - { - "identifier": "", - "title": "ENA Upload Study", - "description": "", - "submissionDate": "", - "publicReleaseDate": "", - "filename": "ENA Upload Study.txt", - "comments": [ - { - "name": "SEEK Study ID", - "value": "2" - }, - { - "name": "SEEK creation date", - "value": "2023-09-22T06:14:34Z" - } - ], - "publications": [ - { - "id": 1, - "pubmed_id": 666, - "title": "The number of the beast", - "abstract": "", - "published_date": "2023-09-22", - "journal": "", - "first_letter": "T", - "contributor_id": 1, - "created_at": "2023-09-22T06:10:30.000Z", - "updated_at": "2023-09-22T06:10:30.000Z", - "doi": null, - "uuid": "a800ee30-3b3c-013c-36ae-2c91a17e7bb9", - "policy_id": 43, - "citation": "", - "deleted_contributor": null, - "registered_mode": 3, - "booktitle": "", - "publisher": "", - "editor": "yo mama", - "publication_type_id": 1, - "url": "", - "version": 1, - "license": null, - "other_creators": null - } - ], - "people": [ - { - "@id": "#people/1", - "lastName": "De Pelseneer", - "firstName": "Kevin", - "midInitials": "", - "email": "jklhndvkjdsbvs@mail.com", - "phone": "", - "fax": "", - "address": "", - "affiliation": "", - "roles": [ - { - "termAccession": "", - "termSource": "", - "annotationValue": "" - } - ], - "comments": [ - { - "@id": "", - "value": "", - "name": "" - } - ] - } - ], - "studyDesignDescriptors": [], - "characteristicCategories": [ - { - "@id": "#characteristic_category/title_62", - "characteristicType": { - "annotationValue": "title", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#characteristic_category/description_63", - "characteristicType": { - "annotationValue": "description", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#characteristic_category/taxon_id_64", - "characteristicType": { - "annotationValue": "taxon_id", - "termAccession": "", - "termSource": "" - } - } - ], - "materials": { - "sources": [ - { - "@id": "#source/13", - "name": "Source 1", - "characteristics": [ - { - "category": { - "@id": "#characteristic_category/title_62" - }, - "value": { - "annotationValue": "Source awsome dragon 1", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/description_63" - }, - "value": { - "annotationValue": "This is my first source. Be gentle!", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/taxon_id_64" - }, - "value": { - "annotationValue": "103695", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#source/14", - "name": "Source 2", - "characteristics": [ - { - "category": { - "@id": "#characteristic_category/title_62" - }, - "value": { - "annotationValue": "Source awsome dragon 2", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/description_63" - }, - "value": { - "annotationValue": "This is the second source. This is my control group!", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/taxon_id_64" - }, - "value": { - "annotationValue": "103695", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#source/15", - "name": "Source 3", - "characteristics": [ - { - "category": { - "@id": "#characteristic_category/title_62" - }, - "value": { - "annotationValue": "Source awsome dragon 3", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/description_63" - }, - "value": { - "annotationValue": "This is the third source. Meh! I have two others...", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/taxon_id_64" - }, - "value": { - "annotationValue": "103695", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - } - ], - "samples": [ - { - "@id": "#sample/16", - "name": "Dragon heart source 1 - A", - "derivesFrom": [ - { - "@id": "#source/13" - } - ], - "characteristics": [], - "factorValues": [ - { - "category": { - "@id": "" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/17", - "name": "Dragon heart source 2 - A", - "derivesFrom": [ - { - "@id": "#source/14" - } - ], - "characteristics": [], - "factorValues": [ - { - "category": { - "@id": "" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/18", - "name": "Dragon heart source 3 - A", - "derivesFrom": [ - { - "@id": "#source/15" - } - ], - "characteristics": [], - "factorValues": [ - { - "category": { - "@id": "" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/19", - "name": "Dragon brain source 1 - A", - "derivesFrom": [ - { - "@id": "#source/13" - } - ], - "characteristics": [], - "factorValues": [ - { - "category": { - "@id": "" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/20", - "name": "Dragon brain source 2 - A", - "derivesFrom": [ - { - "@id": "#source/14" - } - ], - "characteristics": [], - "factorValues": [ - { - "category": { - "@id": "" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/21", - "name": "Dragon brain source 3 - A", - "derivesFrom": [ - { - "@id": "#source/15" - } - ], - "characteristics": [], - "factorValues": [ - { - "category": { - "@id": "" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - } - ] - }, - "protocols": [ - { - "@id": "#protocol/_2", - "name": "sample collection", - "protocolType": { - "annotationValue": "sample collection", - "termAccession": "", - "termSource": "" - }, - "description": "", - "uri": "", - "version": "", - "parameters": [], - "components": [ - { - "componentName": "", - "componentType": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - } - } - ] - }, - { - "@id": "#protocol/_9", - "name": "library construction", - "protocolType": { - "annotationValue": "library construction", - "termAccession": "", - "termSource": "" - }, - "description": "", - "uri": "", - "version": "", - "parameters": [ - { - "@id": "#parameter/70", - "parameterName": { - "annotationValue": "library_construction_protocol", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#parameter/72", - "parameterName": { - "annotationValue": "design_description", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#parameter/73", - "parameterName": { - "annotationValue": "library_source", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#parameter/74", - "parameterName": { - "annotationValue": "library_strategy", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#parameter/75", - "parameterName": { - "annotationValue": "library_selection", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#parameter/76", - "parameterName": { - "annotationValue": "library_layout", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#parameter/77", - "parameterName": { - "annotationValue": "insert_size", - "termAccession": "", - "termSource": "" - } - } - ], - "components": [ - { - "componentName": "", - "componentType": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - } - } - ] - }, - { - "@id": "#protocol/_10", - "name": "nucleic acid sequencing", - "protocolType": { - "annotationValue": "nucleic acid sequencing", - "termAccession": "", - "termSource": "" - }, - "description": "", - "uri": "", - "version": "", - "parameters": [ - { - "@id": "#parameter/84", - "parameterName": { - "annotationValue": "platform", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#parameter/85", - "parameterName": { - "annotationValue": "instrument_model", - "termAccession": "", - "termSource": "" - } - } - ], - "components": [ - { - "componentName": "", - "componentType": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - } - } - ] - } - ], - "processSequence": [ - { - "@id": "#process/sample_collection/16", - "name": "", - "executesProtocol": { - "@id": "#protocol/_2" - }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [ - { - "@id": "#source/13" - } - ], - "outputs": [ - { - "@id": "#sample/16" - } - ] - }, - { - "@id": "#process/sample_collection/17", - "name": "", - "executesProtocol": { - "@id": "#protocol/_2" - }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [ - { - "@id": "#source/14" - } - ], - "outputs": [ - { - "@id": "#sample/17" - } - ] - }, - { - "@id": "#process/sample_collection/18", - "name": "", - "executesProtocol": { - "@id": "#protocol/_2" - }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [ - { - "@id": "#source/15" - } - ], - "outputs": [ - { - "@id": "#sample/18" - } - ] - }, - { - "@id": "#process/sample_collection/19", - "name": "", - "executesProtocol": { - "@id": "#protocol/_2" - }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [ - { - "@id": "#source/13" - } - ], - "outputs": [ - { - "@id": "#sample/19" - } - ] - }, - { - "@id": "#process/sample_collection/20", - "name": "", - "executesProtocol": { - "@id": "#protocol/_2" - }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [ - { - "@id": "#source/14" - } - ], - "outputs": [ - { - "@id": "#sample/20" - } - ] - }, - { - "@id": "#process/sample_collection/21", - "name": "", - "executesProtocol": { - "@id": "#protocol/_2" - }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [ - { - "@id": "#source/15" - } - ], - "outputs": [ - { - "@id": "#sample/21" - } - ] - } - ], - "assays": [ - { - "@id": "#assay/9_10", - "filename": "a_assays.txt", - "measurementType": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "technologyType": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "technologyPlatform": "", - "characteristicCategories": [ - { - "@id": "#characteristic_category/title_71", - "characteristicType": { - "annotationValue": "title", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#characteristic_category/submission_date_79", - "characteristicType": { - "annotationValue": "submission date", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#characteristic_category/status_80", - "characteristicType": { - "annotationValue": "status", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#characteristic_category/accession_81", - "characteristicType": { - "annotationValue": "accession", - "termAccession": "", - "termSource": "" - } - } - ], - "materials": { - "samples": [], - "otherMaterials": [ - { - "@id": "#other_material/34", - "name": "Lib Heart 1 - A", - "type": "library_name", - "characteristics": [ - { - "category": { - "@id": "#characteristic_category/title_71" - }, - "value": { - "annotationValue": "Lib Title Heart 1 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/status_80" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [ - { - "@id": "#sample/16" - } - ] - }, - { - "@id": "#other_material/37", - "name": "Lib Heart 2 - A", - "type": "library_name", - "characteristics": [ - { - "category": { - "@id": "#characteristic_category/title_71" - }, - "value": { - "annotationValue": "Lib Title Heart 2 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/status_80" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [ - { - "@id": "#sample/17" - } - ] - }, - { - "@id": "#other_material/40", - "name": "Lib Heart 3 - A", - "type": "library_name", - "characteristics": [ - { - "category": { - "@id": "#characteristic_category/title_71" - }, - "value": { - "annotationValue": "Lib Title Heart 3 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/status_80" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [ - { - "@id": "#sample/18" - } - ] - }, - { - "@id": "#other_material/43", - "name": "Lib Brain 1 - A", - "type": "library_name", - "characteristics": [ - { - "category": { - "@id": "#characteristic_category/title_71" - }, - "value": { - "annotationValue": "Lib Title Brain 1 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/status_80" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [ - { - "@id": "#sample/19" - } - ] - }, - { - "@id": "#other_material/46", - "name": "Lib Brain 2 - A", - "type": "library_name", - "characteristics": [ - { - "category": { - "@id": "#characteristic_category/title_71" - }, - "value": { - "annotationValue": "Lib Title Brain 2 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/status_80" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [ - { - "@id": "#sample/20" - } - ] - }, - { - "@id": "#other_material/49", - "name": "Lib Brain 3 - A", - "type": "library_name", - "characteristics": [ - { - "category": { - "@id": "#characteristic_category/title_71" - }, - "value": { - "annotationValue": "Lib Title Brain 3 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_79" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/status_80" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_81" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [ - { - "@id": "#sample/21" - } - ] - } - ] - }, - "processSequence": [ - { - "@id": "#process/library_construction/34", - "name": "", - "executesProtocol": { - "@id": "#protocol/_9" - }, - "parameterValues": [ - { - "category": { - "@id": "#parameter/70" - }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/72" - }, - "value": { - "annotationValue": "Lib description Heart 1 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/73" - }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/74" - }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/75" - }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/76" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/77" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { - "@id": "#process/sample_collection/34" - }, - "nextProcess": { - "@id": "#process/nucleic_acid_sequencing/34" - }, - "inputs": [ - { - "@id": "#sample/16" - } - ], - "outputs": [ - { - "@id": "#sample/34" - } - ] - }, - { - "@id": "#process/library_construction/37", - "name": "", - "executesProtocol": { - "@id": "#protocol/_9" - }, - "parameterValues": [ - { - "category": { - "@id": "#parameter/70" - }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/72" - }, - "value": { - "annotationValue": "Lib description Heart 2 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/73" - }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/74" - }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/75" - }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/76" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/77" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { - "@id": "#process/sample_collection/37" - }, - "nextProcess": { - "@id": "#process/nucleic_acid_sequencing/37" - }, - "inputs": [ - { - "@id": "#sample/17" - } - ], - "outputs": [ - { - "@id": "#sample/37" - } - ] - }, - { - "@id": "#process/library_construction/40", - "name": "", - "executesProtocol": { - "@id": "#protocol/_9" - }, - "parameterValues": [ - { - "category": { - "@id": "#parameter/70" - }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/72" - }, - "value": { - "annotationValue": "Lib description Heart 3 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/73" - }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/74" - }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/75" - }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/76" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/77" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { - "@id": "#process/sample_collection/40" - }, - "nextProcess": { - "@id": "#process/nucleic_acid_sequencing/40" - }, - "inputs": [ - { - "@id": "#sample/18" - } - ], - "outputs": [ - { - "@id": "#sample/40" - } - ] - }, - { - "@id": "#process/library_construction/43", - "name": "", - "executesProtocol": { - "@id": "#protocol/_9" - }, - "parameterValues": [ - { - "category": { - "@id": "#parameter/70" - }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/72" - }, - "value": { - "annotationValue": "Lib description Brain 1 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/73" - }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/74" - }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/75" - }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/76" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/77" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { - "@id": "#process/sample_collection/43" - }, - "nextProcess": { - "@id": "#process/nucleic_acid_sequencing/43" - }, - "inputs": [ - { - "@id": "#sample/19" - } - ], - "outputs": [ - { - "@id": "#sample/43" - } - ] - }, - { - "@id": "#process/library_construction/46", - "name": "", - "executesProtocol": { - "@id": "#protocol/_9" - }, - "parameterValues": [ - { - "category": { - "@id": "#parameter/70" - }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/72" - }, - "value": { - "annotationValue": "Lib description Brain 2 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/73" - }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/74" - }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/75" - }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/76" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/77" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { - "@id": "#process/sample_collection/46" - }, - "nextProcess": { - "@id": "#process/nucleic_acid_sequencing/46" - }, - "inputs": [ - { - "@id": "#sample/20" - } - ], - "outputs": [ - { - "@id": "#sample/46" - } - ] - }, - { - "@id": "#process/library_construction/49", - "name": "", - "executesProtocol": { - "@id": "#protocol/_9" - }, - "parameterValues": [ - { - "category": { - "@id": "#parameter/70" - }, - "value": { - "annotationValue": "My super awesome protocol", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/72" - }, - "value": { - "annotationValue": "Lib description Brain 3 - A", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/73" - }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/74" - }, - "value": { - "annotationValue": "WGS", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/75" - }, - "value": { - "annotationValue": "PCR", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/76" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/77" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { - "@id": "#process/sample_collection/49" - }, - "nextProcess": { - "@id": "#process/nucleic_acid_sequencing/49" - }, - "inputs": [ - { - "@id": "#sample/21" - } - ], - "outputs": [ - { - "@id": "#sample/49" - } - ] - }, - { - "@id": "#process/nucleic_acid_sequencing/52", - "name": "", - "executesProtocol": { - "@id": "#protocol/_10" - }, - "parameterValues": [ - { - "category": { - "@id": "#parameter/84" - }, - "value": { - "annotationValue": "Illumina", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/85" - }, - "value": { - "annotationValue": "Illumina NovaSeq 6000", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { - "@id": "#process/library_construction/52" - }, - "nextProcess": {}, - "inputs": [ - { - "@id": "#other_material/34" - } - ], - "outputs": [ - { - "@id": "#sample/52" - } - ] - }, - { - "@id": "#process/nucleic_acid_sequencing/53", - "name": "", - "executesProtocol": { - "@id": "#protocol/_10" - }, - "parameterValues": [ - { - "category": { - "@id": "#parameter/84" - }, - "value": { - "annotationValue": "Illumina", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/85" - }, - "value": { - "annotationValue": "Illumina NovaSeq 6000", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { - "@id": "#process/library_construction/53" - }, - "nextProcess": {}, - "inputs": [ - { - "@id": "#other_material/37" - } - ], - "outputs": [ - { - "@id": "#sample/53" - } - ] - }, - { - "@id": "#process/nucleic_acid_sequencing/54", - "name": "", - "executesProtocol": { - "@id": "#protocol/_10" - }, - "parameterValues": [ - { - "category": { - "@id": "#parameter/84" - }, - "value": { - "annotationValue": "Illumina", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/85" - }, - "value": { - "annotationValue": "Illumina NovaSeq 6000", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { - "@id": "#process/library_construction/54" - }, - "nextProcess": {}, - "inputs": [ - { - "@id": "#other_material/40" - } - ], - "outputs": [ - { - "@id": "#sample/54" - } - ] - }, - { - "@id": "#process/nucleic_acid_sequencing/55", - "name": "", - "executesProtocol": { - "@id": "#protocol/_10" - }, - "parameterValues": [ - { - "category": { - "@id": "#parameter/84" - }, - "value": { - "annotationValue": "Illumina", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/85" - }, - "value": { - "annotationValue": "Illumina NovaSeq 6000", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { - "@id": "#process/library_construction/55" - }, - "nextProcess": {}, - "inputs": [ - { - "@id": "#other_material/43" - } - ], - "outputs": [ - { - "@id": "#sample/55" - } - ] - }, - { - "@id": "#process/nucleic_acid_sequencing/56", - "name": "", - "executesProtocol": { - "@id": "#protocol/_10" - }, - "parameterValues": [ - { - "category": { - "@id": "#parameter/84" - }, - "value": { - "annotationValue": "Illumina", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/85" - }, - "value": { - "annotationValue": "Illumina NovaSeq 6000", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { - "@id": "#process/library_construction/56" - }, - "nextProcess": {}, - "inputs": [ - { - "@id": "#other_material/46" - } - ], - "outputs": [ - { - "@id": "#sample/56" - } - ] - }, - { - "@id": "#process/nucleic_acid_sequencing/57", - "name": "", - "executesProtocol": { - "@id": "#protocol/_10" - }, - "parameterValues": [ - { - "category": { - "@id": "#parameter/84" - }, - "value": { - "annotationValue": "Illumina", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/85" - }, - "value": { - "annotationValue": "Illumina NovaSeq 6000", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { - "@id": "#process/library_construction/57" - }, - "nextProcess": {}, - "inputs": [ - { - "@id": "#other_material/49" - } - ], - "outputs": [ - { - "@id": "#sample/57" - } - ] - } - ], - "dataFiles": [ - { - "@id": "#data_file/52", - "name": "Datafile Dragon Heart 1.fastq", - "type": "Raw Data File", - "comments": [ - { - "name": "file_type", - "value": "fastq" - }, - { - "name": "file checksum", - "value": "" - }, - { - "name": "submission date", - "value": "" - }, - { - "name": "status", - "value": "" - }, - { - "name": "accession", - "value": "" - } - ] - }, - { - "@id": "#data_file/53", - "name": "Datafile Dragon Heart 2.fastq", - "type": "Raw Data File", - "comments": [ - { - "name": "file_type", - "value": "fastq" - }, - { - "name": "file checksum", - "value": "" - }, - { - "name": "submission date", - "value": "" - }, - { - "name": "status", - "value": "" - }, - { - "name": "accession", - "value": "" - } - ] - }, - { - "@id": "#data_file/54", - "name": "Datafile Dragon Heart 3.fastq", - "type": "Raw Data File", - "comments": [ - { - "name": "file_type", - "value": "fastq" - }, - { - "name": "file checksum", - "value": "" - }, - { - "name": "submission date", - "value": "" - }, - { - "name": "status", - "value": "" - }, - { - "name": "accession", - "value": "" - } - ] - }, - { - "@id": "#data_file/55", - "name": "Datafile Dragon Brain 1.fastq", - "type": "Raw Data File", - "comments": [ - { - "name": "file_type", - "value": "fastq" - }, - { - "name": "file checksum", - "value": "" - }, - { - "name": "submission date", - "value": "" - }, - { - "name": "status", - "value": "" - }, - { - "name": "accession", - "value": "" - } - ] - }, - { - "@id": "#data_file/56", - "name": "Datafile Dragon Brain 2.fastq", - "type": "Raw Data File", - "comments": [ - { - "name": "file_type", - "value": "fastq" - }, - { - "name": "file checksum", - "value": "" - }, - { - "name": "submission date", - "value": "" - }, - { - "name": "status", - "value": "" - }, - { - "name": "accession", - "value": "" - } - ] - }, - { - "@id": "#data_file/57", - "name": "Datafile Dragon Brain 3.fastq", - "type": "Raw Data File", - "comments": [ - { - "name": "file_type", - "value": "fastq" - }, - { - "name": "file checksum", - "value": "" - }, - { - "name": "submission date", - "value": "" - }, - { - "name": "status", - "value": "" - }, - { - "name": "accession", - "value": "" - } - ] - } - ], - "unitCategories": [] - } - ], - "factors": [], - "unitCategories": [] - } - ] -} \ No newline at end of file diff --git a/tests/test_data/multi_study_multi_assay_stream_investigation.json b/tests/test_data/multi_study_multi_assay_stream_investigation.json new file mode 100644 index 0000000..5f47bb0 --- /dev/null +++ b/tests/test_data/multi_study_multi_assay_stream_investigation.json @@ -0,0 +1,5131 @@ +{ + "identifier": "", + "title": "Test Investigation original", + "description": "This investigation uses the Standard ENA templates.", + "submissionDate": "", + "publicReleaseDate": "", + "ontologySourceReferences": [], + "filename": "Test Investigation original.txt", + "comments": [ + { + "name": "ISAjson export time", + "value": "2023-10-13T06:24:54Z" + }, + { + "name": "SEEK Project name", + "value": "Test Project ENA upload Tool" + }, + { + "name": "SEEK Project ID", + "value": "http://localhost:3000/single_pages/16" + }, + { + "name": "SEEK Investigation ID", + "value": "19" + } + ], + "publications": [], + "people": [ + { + "@id": "#people/4", + "lastName": "De Pelseneer", + "firstName": "Kevin", + "midInitials": "", + "email": "kevin.depelseneer@psb.ugent.be", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { + "termAccession": "", + "termSource": "", + "annotationValue": "" + } + ], + "comments": [ + { + "@id": "", + "value": "", + "name": "" + } + ] + } + ], + "studies": [ + { + "identifier": "", + "title": "ENA Study 1", + "description": "This is the first ENA Study.", + "submissionDate": "", + "publicReleaseDate": "", + "filename": "ENA Study 1.txt", + "comments": [ + { + "@id": "#study_comment/19_8_49", + "name": "ena_sample_alias_prefix", + "value": "https://datahub.elixir-belgium.org/samples/" + }, + { + "@id": "#study_comment/19_255b7d70-4bbf-013c-6732-7a163e608de1", + "name": "SEEK Study ID", + "value": "19" + }, + { + "@id": "#study_comment/19_255b8560-4bbf-013c-6733-7a163e608de1", + "name": "SEEK creation date", + "value": "2023-10-09T13:39:46Z" + } + ], + "publications": [ + { + "pubMedID": 5, + "doi": "", + "status": { + "termAccession": "", + "termSource": "", + "annotationValue": "" + }, + "title": "Test publication 1", + "author_list": "", + "comments": [ + { + "@id": "#publication_comment/1_27", + "name": "linked_assays", + "value": "[{\"id\":27,\"title\":\"ENA Library construction - Study 1 - Stream 1\"}]" + }, + { + "@id": "#publication_comment/1_7", + "name": "linked_studies", + "value": "[{\"id\":7,\"title\":\"Test Study in Single Page\"}]" + } + ] + }, + { + "pubMedID": 6, + "doi": "", + "status": { + "termAccession": "", + "termSource": "", + "annotationValue": "" + }, + "title": "Test publication 2", + "author_list": "", + "comments": [ + { + "@id": "#publication_comment/2_29", + "name": "linked_assays", + "value": "[{\"id\":29,\"title\":\"ENA Library construction - Study 1 - Stream 2\"}]" + }, + { + "@id": "#publication_comment/2_", + "name": "linked_studies", + "value": "[]" + } + ] + } + ], + "people": [ + { + "@id": "#people/4", + "lastName": "De Pelseneer", + "firstName": "Kevin", + "midInitials": "", + "email": "kevin.depelseneer@psb.ugent.be", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { + "termAccession": "", + "termSource": "", + "annotationValue": "" + } + ], + "comments": [ + { + "@id": "", + "value": "", + "name": "" + } + ] + } + ], + "studyDesignDescriptors": [], + "characteristicCategories": [ + { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_410", + "characteristicType": { + "annotationValue": "geographic location (country and/or sea)", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/taxon_id_411", + "characteristicType": { + "annotationValue": "taxon_id", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/title_415", + "characteristicType": { + "annotationValue": "title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/sample_description_416", + "characteristicType": { + "annotationValue": "sample_description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/collection_date_417", + "characteristicType": { + "annotationValue": "collection date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/accession_418", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_419", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_420", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "sources": [ + { + "@id": "#source/140", + "name": "Source 1", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_410" + }, + "value": { + "annotationValue": "Afghanistan", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/taxon_id_411" + }, + "value": { + "annotationValue": "1234", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#source/141", + "name": "Source 2", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_410" + }, + "value": { + "annotationValue": "Albania", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/taxon_id_411" + }, + "value": { + "annotationValue": "2345", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ], + "samples": [ + { + "@id": "#sample/142", + "name": "Sample 1", + "derivesFrom": [ + { + "@id": "#source/140" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_415" + }, + "value": { + "annotationValue": "Sample title 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_416" + }, + "value": { + "annotationValue": "Sample description 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_417" + }, + "value": { + "annotationValue": "2023", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_418" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_419" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_420" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/143", + "name": "Sample 2", + "derivesFrom": [ + { + "@id": "#source/140" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_415" + }, + "value": { + "annotationValue": "Sample title 2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_416" + }, + "value": { + "annotationValue": "Sample description 2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_417" + }, + "value": { + "annotationValue": "2022", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_418" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_419" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_420" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/144", + "name": "Sample 3", + "derivesFrom": [ + { + "@id": "#source/141" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_415" + }, + "value": { + "annotationValue": "Sample title 3", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_416" + }, + "value": { + "annotationValue": "Sample description 3", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_417" + }, + "value": { + "annotationValue": "2021", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_418" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_419" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_420" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/145", + "name": "Sample 4", + "derivesFrom": [ + { + "@id": "#source/141" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_415" + }, + "value": { + "annotationValue": "Sample title 4", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_416" + }, + "value": { + "annotationValue": "Sample description 4", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_417" + }, + "value": { + "annotationValue": "2020", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_418" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_419" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_420" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/196", + "name": "Sample 7", + "derivesFrom": [ + { + "@id": "#source/141" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_415" + }, + "value": { + "annotationValue": "Sample title 7", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_416" + }, + "value": { + "annotationValue": "This sample is not used in a ENA assay stream", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_417" + }, + "value": { + "annotationValue": "2016", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_418" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_419" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_420" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ] + }, + "protocols": [ + { + "@id": "#protocol/3_19", + "name": "sample collection", + "protocolType": { + "annotationValue": "sample collection", + "termAccession": "", + "termSource": "" + }, + "description": "This SOP describes a recipe how to make chocolate chip cookies.", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_27", + "name": "library construction", + "protocolType": { + "annotationValue": "library construction", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [ + { + "@id": "#parameter/423", + "parameterName": { + "annotationValue": "library_construction_protocol", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/425", + "parameterName": { + "annotationValue": "design_description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/427", + "parameterName": { + "annotationValue": "library_source", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/428", + "parameterName": { + "annotationValue": "library_strategy", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/429", + "parameterName": { + "annotationValue": "library_selection", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/430", + "parameterName": { + "annotationValue": "library_layout", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/431", + "parameterName": { + "annotationValue": "insert_size", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/432", + "parameterName": { + "annotationValue": "platform", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/433", + "parameterName": { + "annotationValue": "instrument_model", + "termAccession": "", + "termSource": "" + } + } + ], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_28", + "name": "nucleic acid sequencing", + "protocolType": { + "annotationValue": "nucleic acid sequencing", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_29", + "name": "library construction", + "protocolType": { + "annotationValue": "library construction", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [ + { + "@id": "#parameter/448", + "parameterName": { + "annotationValue": "library_construction_protocol", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/450", + "parameterName": { + "annotationValue": "design_description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/452", + "parameterName": { + "annotationValue": "library_source", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/453", + "parameterName": { + "annotationValue": "library_strategy", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/454", + "parameterName": { + "annotationValue": "library_selection", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/455", + "parameterName": { + "annotationValue": "library_layout", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/456", + "parameterName": { + "annotationValue": "insert_size", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/457", + "parameterName": { + "annotationValue": "platform", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/458", + "parameterName": { + "annotationValue": "instrument_model", + "termAccession": "", + "termSource": "" + } + } + ], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_30", + "name": "nucleic acid sequencing", + "protocolType": { + "annotationValue": "nucleic acid sequencing", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + } + ], + "processSequence": [ + { + "@id": "#process/sample_collection/142", + "name": "", + "executesProtocol": { + "@id": "#protocol/3_19" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/140" + } + ], + "outputs": [ + { + "@id": "#sample/142" + } + ] + }, + { + "@id": "#process/sample_collection/143", + "name": "", + "executesProtocol": { + "@id": "#protocol/3_19" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/140" + } + ], + "outputs": [ + { + "@id": "#sample/143" + } + ] + }, + { + "@id": "#process/sample_collection/144", + "name": "", + "executesProtocol": { + "@id": "#protocol/3_19" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/141" + } + ], + "outputs": [ + { + "@id": "#sample/144" + } + ] + }, + { + "@id": "#process/sample_collection/145", + "name": "", + "executesProtocol": { + "@id": "#protocol/3_19" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/141" + } + ], + "outputs": [ + { + "@id": "#sample/145" + } + ] + }, + { + "@id": "#process/sample_collection/196", + "name": "", + "executesProtocol": { + "@id": "#protocol/3_19" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/141" + } + ], + "outputs": [ + { + "@id": "#sample/196" + } + ] + } + ], + "assays": [ + { + "@id": "#assay/27_28", + "filename": "a_ena_stream_1.txt", + "measurementType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "technologyType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "comments": [ + { + "@id": "#assay_comment/19_27_28_9_50", + "name": "ena_study_title", + "value": "Ena Study 1" + }, + { + "@id": "#assay_comment/19_27_28_9_51", + "name": "study_type", + "value": "Whole Genome Sequencing" + }, + { + "@id": "#assay_comment/19_27_28_9_52", + "name": "new_study_type", + "value": "This should not be submitted!" + }, + { + "@id": "#assay_comment/19_27_28_9_53", + "name": "ena_study_abstract", + "value": "This is Ena Study 1." + }, + { + "@id": "#assay_comment/19_27_28_9_54", + "name": "assay_stream", + "value": "Ena stream 1" + }, + { + "@id": "#assay_comment/19_27_28_9_55", + "name": "ena_study_alias_prefix", + "value": "https://datahub.elixir-belgium.org/studies/" + }, + { + "@id": "#assay_comment/19_27_28_9_56", + "name": "ena_experiment_alias_prefix", + "value": "https://datahub.elixir-belgium.org/samples/" + }, + { + "@id": "#assay_comment/19_27_28_9_57", + "name": "ena_run_alias_prefix", + "value": "https://datahub.elixir-belgium.org/samples/" + }, + { + "@id": "#assay_comment/27_28", + "name": "linked_assays", + "value": "[{\"id\":27,\"title\":\"ENA Library construction - Study 1 - Stream 1\"},{\"id\":28,\"title\":\"ENA DNA Sequencing - Study 1 - Stream 1\"}]" + } + ], + "technologyPlatform": "", + "characteristicCategories": [ + { + "@id": "#characteristic_category/title_424", + "characteristicType": { + "annotationValue": "title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/accession_434", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_435", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_436", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "samples": [], + "otherMaterials": [ + { + "@id": "#other_material/146", + "name": "Library 1", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_424" + }, + "value": { + "annotationValue": "Library title 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_434" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_435" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_436" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/142" + } + ] + }, + { + "@id": "#other_material/147", + "name": "Library 2", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_424" + }, + "value": { + "annotationValue": "Library title 2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_434" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_435" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_436" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/143" + } + ] + }, + { + "@id": "#other_material/148", + "name": "Library 3", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_424" + }, + "value": { + "annotationValue": "Library title 3", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_434" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_435" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_436" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/144" + } + ] + }, + { + "@id": "#other_material/149", + "name": "Library 4", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_424" + }, + "value": { + "annotationValue": "Library title 4", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_434" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_435" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_436" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/145" + } + ] + } + ] + }, + "processSequence": [ + { + "@id": "#process/library_construction/146", + "name": "", + "executesProtocol": { + "@id": "#protocol/_27" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/423" + }, + "value": { + "annotationValue": "My special protocol 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/425" + }, + "value": { + "annotationValue": "Library description 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/427" + }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/428" + }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/429" + }, + "value": { + "annotationValue": "RANDOM", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/430" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/431" + }, + "value": { + "annotationValue": "123", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/432" + }, + "value": { + "annotationValue": "LS454", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/433" + }, + "value": { + "annotationValue": "454 GS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/146" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/146" + }, + "inputs": [ + { + "@id": "#sample/142" + } + ], + "outputs": [ + { + "@id": "#sample/146" + } + ] + }, + { + "@id": "#process/library_construction/147", + "name": "", + "executesProtocol": { + "@id": "#protocol/_27" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/423" + }, + "value": { + "annotationValue": "My special protocol 2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/425" + }, + "value": { + "annotationValue": "Library description 2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/427" + }, + "value": { + "annotationValue": "GENOMIC SINGLE CELL", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/428" + }, + "value": { + "annotationValue": "WGA", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/429" + }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/430" + }, + "value": { + "annotationValue": "PAIRED", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/431" + }, + "value": { + "annotationValue": "234", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/432" + }, + "value": { + "annotationValue": "Illumina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/433" + }, + "value": { + "annotationValue": "Illumina Genome Analyzer", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/147" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/147" + }, + "inputs": [ + { + "@id": "#sample/143" + } + ], + "outputs": [ + { + "@id": "#sample/147" + } + ] + }, + { + "@id": "#process/library_construction/148", + "name": "", + "executesProtocol": { + "@id": "#protocol/_27" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/423" + }, + "value": { + "annotationValue": "My special protocol 3", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/425" + }, + "value": { + "annotationValue": "Library description 3", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/427" + }, + "value": { + "annotationValue": "TRANSCRIPTOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/428" + }, + "value": { + "annotationValue": "WXS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/429" + }, + "value": { + "annotationValue": "RANDOM PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/430" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/431" + }, + "value": { + "annotationValue": "345", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/432" + }, + "value": { + "annotationValue": "PacBio", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/433" + }, + "value": { + "annotationValue": "PacBio RS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/148" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/148" + }, + "inputs": [ + { + "@id": "#sample/144" + } + ], + "outputs": [ + { + "@id": "#sample/148" + } + ] + }, + { + "@id": "#process/library_construction/149", + "name": "", + "executesProtocol": { + "@id": "#protocol/_27" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/423" + }, + "value": { + "annotationValue": "My special protocol 4", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/425" + }, + "value": { + "annotationValue": "Library description 4", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/427" + }, + "value": { + "annotationValue": "TRANSCRIPTOMIC SINGLE CELL", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/428" + }, + "value": { + "annotationValue": "RNA-Seq", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/429" + }, + "value": { + "annotationValue": "RT-PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/430" + }, + "value": { + "annotationValue": "PAIRED", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/431" + }, + "value": { + "annotationValue": "456", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/432" + }, + "value": { + "annotationValue": "Themo Fisher Scientific", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/433" + }, + "value": { + "annotationValue": "AB 3730xL Genetic Analyzer", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/149" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/149" + }, + "inputs": [ + { + "@id": "#sample/145" + } + ], + "outputs": [ + { + "@id": "#sample/149" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/150", + "name": "", + "executesProtocol": { + "@id": "#protocol/_28" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/150" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/146" + } + ], + "outputs": [ + { + "@id": "#sample/150" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/151", + "name": "", + "executesProtocol": { + "@id": "#protocol/_28" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/151" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/147" + } + ], + "outputs": [ + { + "@id": "#sample/151" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/152", + "name": "", + "executesProtocol": { + "@id": "#protocol/_28" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/152" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/148" + } + ], + "outputs": [ + { + "@id": "#sample/152" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/153", + "name": "", + "executesProtocol": { + "@id": "#protocol/_28" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/153" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/149" + } + ], + "outputs": [ + { + "@id": "#sample/153" + } + ] + } + ], + "dataFiles": [ + { + "@id": "#data_file/150", + "name": "Raw Data File 1", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data_file_1.bam" + }, + { + "name": "file_type", + "value": "bam" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + }, + { + "@id": "#data_file/151", + "name": "Raw Data File 2", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data_file_2.cram" + }, + { + "name": "file_type", + "value": "cram" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + }, + { + "@id": "#data_file/152", + "name": "Raw Data File 3", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data_file_3.fastq" + }, + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + }, + { + "@id": "#data_file/153", + "name": "Raw Data File 4", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data_file_4.sff" + }, + { + "name": "file_type", + "value": "sff" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + } + ], + "unitCategories": [] + }, + { + "@id": "#assay/29_30", + "filename": "a_ena_stream_2.txt", + "measurementType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "technologyType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "comments": [ + { + "@id": "#assay_comment/19_29_30_10_50", + "name": "ena_study_title", + "value": "Ena Study 2" + }, + { + "@id": "#assay_comment/19_29_30_10_51", + "name": "study_type", + "value": "Other" + }, + { + "@id": "#assay_comment/19_29_30_10_52", + "name": "new_study_type", + "value": "My special study type" + }, + { + "@id": "#assay_comment/19_29_30_10_53", + "name": "ena_study_abstract", + "value": "This is Ena Study 2." + }, + { + "@id": "#assay_comment/19_29_30_10_54", + "name": "assay_stream", + "value": "Ena stream 2" + }, + { + "@id": "#assay_comment/19_29_30_10_55", + "name": "ena_study_alias_prefix", + "value": "https://datahub.elixir-belgium.org/studies/" + }, + { + "@id": "#assay_comment/19_29_30_10_56", + "name": "ena_experiment_alias_prefix", + "value": "https://datahub.elixir-belgium.org/samples/" + }, + { + "@id": "#assay_comment/19_29_30_10_57", + "name": "ena_run_alias_prefix", + "value": "https://datahub.elixir-belgium.org/samples/" + }, + { + "@id": "#assay_comment/29_30", + "name": "linked_assays", + "value": "[{\"id\":29,\"title\":\"ENA Library construction - Study 1 - Stream 2\"},{\"id\":30,\"title\":\"ENA DNA Sequencing - Study 1 - Stream 2\"}]" + } + ], + "technologyPlatform": "", + "characteristicCategories": [ + { + "@id": "#characteristic_category/title_449", + "characteristicType": { + "annotationValue": "title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/accession_459", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_460", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_461", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "samples": [], + "otherMaterials": [ + { + "@id": "#other_material/154", + "name": "Library 5", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_449" + }, + "value": { + "annotationValue": "Library title 5", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_459" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_460" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_461" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/142" + } + ] + }, + { + "@id": "#other_material/155", + "name": "Library 6", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_449" + }, + "value": { + "annotationValue": "Library title 6", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_459" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_460" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_461" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/143" + } + ] + }, + { + "@id": "#other_material/156", + "name": "Library 7", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_449" + }, + "value": { + "annotationValue": "Library title 7", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_459" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_460" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_461" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/144" + } + ] + }, + { + "@id": "#other_material/157", + "name": "Library 8", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_449" + }, + "value": { + "annotationValue": "Library title 8", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_459" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_460" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_461" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/145" + } + ] + } + ] + }, + "processSequence": [ + { + "@id": "#process/library_construction/154", + "name": "", + "executesProtocol": { + "@id": "#protocol/_29" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/448" + }, + "value": { + "annotationValue": "My library construction protocol 5", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/450" + }, + "value": { + "annotationValue": "Library design description 5", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/452" + }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/453" + }, + "value": { + "annotationValue": "ssRNA-seq", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/454" + }, + "value": { + "annotationValue": "HMPR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/455" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/456" + }, + "value": { + "annotationValue": "123", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/457" + }, + "value": { + "annotationValue": "LS454", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/458" + }, + "value": { + "annotationValue": "454 GS 20", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/154" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/154" + }, + "inputs": [ + { + "@id": "#sample/142" + } + ], + "outputs": [ + { + "@id": "#sample/154" + } + ] + }, + { + "@id": "#process/library_construction/155", + "name": "", + "executesProtocol": { + "@id": "#protocol/_29" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/448" + }, + "value": { + "annotationValue": "My library construction protocol 6", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/450" + }, + "value": { + "annotationValue": "Library design description 6", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/452" + }, + "value": { + "annotationValue": "GENOMIC SINGLE CELL", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/453" + }, + "value": { + "annotationValue": "miRNA-Seq", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/454" + }, + "value": { + "annotationValue": "MF", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/455" + }, + "value": { + "annotationValue": "PAIRED", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/456" + }, + "value": { + "annotationValue": "234", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/457" + }, + "value": { + "annotationValue": "Illumina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/458" + }, + "value": { + "annotationValue": "Illumina Genome Analyzer II", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/155" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/155" + }, + "inputs": [ + { + "@id": "#sample/143" + } + ], + "outputs": [ + { + "@id": "#sample/155" + } + ] + }, + { + "@id": "#process/library_construction/156", + "name": "", + "executesProtocol": { + "@id": "#protocol/_29" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/448" + }, + "value": { + "annotationValue": "My library construction protocol 7", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/450" + }, + "value": { + "annotationValue": "Library design description 7", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/452" + }, + "value": { + "annotationValue": "TRANSCRIPTOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/453" + }, + "value": { + "annotationValue": "ncRNA-Seq", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/454" + }, + "value": { + "annotationValue": "size fractionation", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/455" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/456" + }, + "value": { + "annotationValue": "345", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/457" + }, + "value": { + "annotationValue": "PacBio", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/458" + }, + "value": { + "annotationValue": "PacBio RS II", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/156" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/156" + }, + "inputs": [ + { + "@id": "#sample/144" + } + ], + "outputs": [ + { + "@id": "#sample/156" + } + ] + }, + { + "@id": "#process/library_construction/157", + "name": "", + "executesProtocol": { + "@id": "#protocol/_29" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/448" + }, + "value": { + "annotationValue": "My library construction protocol 8", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/450" + }, + "value": { + "annotationValue": "Library design description 8", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/452" + }, + "value": { + "annotationValue": "SYNTHETIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/453" + }, + "value": { + "annotationValue": "FL-cDNA", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/454" + }, + "value": { + "annotationValue": "repeat fractionation", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/455" + }, + "value": { + "annotationValue": "PAIRED", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/456" + }, + "value": { + "annotationValue": "456", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/457" + }, + "value": { + "annotationValue": "Themo Fisher Scientific", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/458" + }, + "value": { + "annotationValue": "AB 3730 Genetic Analyzer", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/157" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/157" + }, + "inputs": [ + { + "@id": "#sample/145" + } + ], + "outputs": [ + { + "@id": "#sample/157" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/158", + "name": "", + "executesProtocol": { + "@id": "#protocol/_30" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/158" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/154" + } + ], + "outputs": [ + { + "@id": "#sample/158" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/159", + "name": "", + "executesProtocol": { + "@id": "#protocol/_30" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/159" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/155" + } + ], + "outputs": [ + { + "@id": "#sample/159" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/160", + "name": "", + "executesProtocol": { + "@id": "#protocol/_30" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/160" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/156" + } + ], + "outputs": [ + { + "@id": "#sample/160" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/161", + "name": "", + "executesProtocol": { + "@id": "#protocol/_30" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/161" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/157" + } + ], + "outputs": [ + { + "@id": "#sample/161" + } + ] + } + ], + "dataFiles": [ + { + "@id": "#data_file/158", + "name": "Raw Data File 5", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data file 5.bam" + }, + { + "name": "file_type", + "value": "bam" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + }, + { + "@id": "#data_file/159", + "name": "Raw Data File 6", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data file 6.cram" + }, + { + "name": "file_type", + "value": "cram" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + }, + { + "@id": "#data_file/160", + "name": "Raw Data File 7", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data file 7.fastq" + }, + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + }, + { + "@id": "#data_file/161", + "name": "Raw Data File 8", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data file 8.sff" + }, + { + "name": "file_type", + "value": "sff" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + } + ], + "unitCategories": [] + } + ], + "factors": [], + "unitCategories": [] + }, + { + "identifier": "", + "title": "ENA Study 2", + "description": "ENA Study 2 - description", + "submissionDate": "", + "publicReleaseDate": "", + "filename": "ENA Study 2.txt", + "comments": [ + { + "@id": "#study_comment/20_11_49", + "name": "ena_sample_alias_prefix", + "value": "https://datahub.elixir-belgium.org/samples/" + }, + { + "@id": "#study_comment/20_259d4380-4bbf-013c-6734-7a163e608de1", + "name": "SEEK Study ID", + "value": "20" + }, + { + "@id": "#study_comment/20_259d4a20-4bbf-013c-6735-7a163e608de1", + "name": "SEEK creation date", + "value": "2023-10-09T14:34:40Z" + } + ], + "publications": [ + { + "pubMedID": 7, + "doi": null, + "status": { + "termAccession": "", + "termSource": "", + "annotationValue": "" + }, + "title": "Test Publication 3", + "author_list": "", + "comments": [ + { + "@id": "#publication_comment/3_31", + "name": "linked_assays", + "value": "[{\"id\":31,\"title\":\"ENA Library construction - Study 2 - Stream 1\"}]" + }, + { + "@id": "#publication_comment/3_", + "name": "linked_studies", + "value": "[]" + } + ] + } + ], + "people": [ + { + "@id": "#people/4", + "lastName": "De Pelseneer", + "firstName": "Kevin", + "midInitials": "", + "email": "kevin.depelseneer@psb.ugent.be", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { + "termAccession": "", + "termSource": "", + "annotationValue": "" + } + ], + "comments": [ + { + "@id": "", + "value": "", + "name": "" + } + ] + } + ], + "studyDesignDescriptors": [], + "characteristicCategories": [ + { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_472", + "characteristicType": { + "annotationValue": "geographic location (country and/or sea)", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/taxon_id_473", + "characteristicType": { + "annotationValue": "taxon_id", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/title_477", + "characteristicType": { + "annotationValue": "title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/sample_description_478", + "characteristicType": { + "annotationValue": "sample_description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/collection_date_479", + "characteristicType": { + "annotationValue": "collection date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/accession_480", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_481", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_482", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "sources": [ + { + "@id": "#source/162", + "name": "Source 3", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_472" + }, + "value": { + "annotationValue": "Afghanistan", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/taxon_id_473" + }, + "value": { + "annotationValue": "9876", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#source/163", + "name": "Source 4", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_472" + }, + "value": { + "annotationValue": "Albania", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/taxon_id_473" + }, + "value": { + "annotationValue": "8765", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ], + "samples": [ + { + "@id": "#sample/164", + "name": "Sample 5", + "derivesFrom": [ + { + "@id": "#source/162" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_477" + }, + "value": { + "annotationValue": "Sample title 5", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_478" + }, + "value": { + "annotationValue": "Sample description 5", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_479" + }, + "value": { + "annotationValue": "2019", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_480" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_481" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_482" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/165", + "name": "Sample 6", + "derivesFrom": [ + { + "@id": "#source/163" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_477" + }, + "value": { + "annotationValue": "Sample title 6", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_478" + }, + "value": { + "annotationValue": "Sample description 6", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_479" + }, + "value": { + "annotationValue": "2018", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_480" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_481" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_482" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ] + }, + "protocols": [ + { + "@id": "#protocol/3_20", + "name": "sample collection", + "protocolType": { + "annotationValue": "sample collection", + "termAccession": "", + "termSource": "" + }, + "description": "This SOP describes a recipe how to make chocolate chip cookies.", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_31", + "name": "library construction", + "protocolType": { + "annotationValue": "library construction", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [ + { + "@id": "#parameter/485", + "parameterName": { + "annotationValue": "library_construction_protocol", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/487", + "parameterName": { + "annotationValue": "design_description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/489", + "parameterName": { + "annotationValue": "library_source", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/490", + "parameterName": { + "annotationValue": "library_strategy", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/491", + "parameterName": { + "annotationValue": "library_selection", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/492", + "parameterName": { + "annotationValue": "library_layout", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/493", + "parameterName": { + "annotationValue": "insert_size", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/494", + "parameterName": { + "annotationValue": "platform", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/495", + "parameterName": { + "annotationValue": "instrument_model", + "termAccession": "", + "termSource": "" + } + } + ], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_32", + "name": "nucleic acid sequencing", + "protocolType": { + "annotationValue": "nucleic acid sequencing", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + } + ], + "processSequence": [ + { + "@id": "#process/sample_collection/164", + "name": "", + "executesProtocol": { + "@id": "#protocol/3_20" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/162" + } + ], + "outputs": [ + { + "@id": "#sample/164" + } + ] + }, + { + "@id": "#process/sample_collection/165", + "name": "", + "executesProtocol": { + "@id": "#protocol/3_20" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/163" + } + ], + "outputs": [ + { + "@id": "#sample/165" + } + ] + } + ], + "assays": [ + { + "@id": "#assay/31_32", + "filename": "a_ena_stream_3.txt", + "measurementType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "technologyType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "comments": [ + { + "@id": "#assay_comment/20_31_32_12_50", + "name": "ena_study_title", + "value": "Ena Study 3" + }, + { + "@id": "#assay_comment/20_31_32_12_51", + "name": "study_type", + "value": "Other" + }, + { + "@id": "#assay_comment/20_31_32_12_52", + "name": "new_study_type", + "value": "My other special study type" + }, + { + "@id": "#assay_comment/20_31_32_12_53", + "name": "ena_study_abstract", + "value": "This is Ena Study 3." + }, + { + "@id": "#assay_comment/20_31_32_12_54", + "name": "assay_stream", + "value": "Ena stream 3" + }, + { + "@id": "#assay_comment/20_31_32_12_55", + "name": "ena_study_alias_prefix", + "value": "https://datahub.elixir-belgium.org/studies/" + }, + { + "@id": "#assay_comment/20_31_32_12_56", + "name": "ena_experiment_alias_prefix", + "value": "https://datahub.elixir-belgium.org/samples/" + }, + { + "@id": "#assay_comment/20_31_32_12_57", + "name": "ena_run_alias_prefix", + "value": "https://datahub.elixir-belgium.org/samples/" + }, + { + "@id": "#assay_comment/31_32", + "name": "linked_assays", + "value": "[{\"id\":31,\"title\":\"ENA Library construction - Study 2 - Stream 1\"},{\"id\":32,\"title\":\"ENA DNA Sequencing - Study 2 - Stream 1\"}]" + } + ], + "technologyPlatform": "", + "characteristicCategories": [ + { + "@id": "#characteristic_category/title_486", + "characteristicType": { + "annotationValue": "title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/accession_496", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_497", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_498", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "samples": [], + "otherMaterials": [ + { + "@id": "#other_material/166", + "name": "Library 9", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_486" + }, + "value": { + "annotationValue": "Library title 9", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_496" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_497" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_498" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/164" + } + ] + }, + { + "@id": "#other_material/167", + "name": "Library 10", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_486" + }, + "value": { + "annotationValue": "Library title 10", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_496" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_497" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_498" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/165" + } + ] + } + ] + }, + "processSequence": [ + { + "@id": "#process/library_construction/166", + "name": "", + "executesProtocol": { + "@id": "#protocol/_31" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/485" + }, + "value": { + "annotationValue": "My library construction protocol 9", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/487" + }, + "value": { + "annotationValue": "Library design description 9", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/489" + }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/490" + }, + "value": { + "annotationValue": "EST", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/491" + }, + "value": { + "annotationValue": "MNase", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/492" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/493" + }, + "value": { + "annotationValue": "987", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/494" + }, + "value": { + "annotationValue": "Themo Fisher Scientific", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/495" + }, + "value": { + "annotationValue": "AB 3500xL Genetic Analyzer", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/166" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/166" + }, + "inputs": [ + { + "@id": "#sample/164" + } + ], + "outputs": [ + { + "@id": "#sample/166" + } + ] + }, + { + "@id": "#process/library_construction/167", + "name": "", + "executesProtocol": { + "@id": "#protocol/_31" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/485" + }, + "value": { + "annotationValue": "My library construction protocol 10", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/487" + }, + "value": { + "annotationValue": "Library design description 10", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/489" + }, + "value": { + "annotationValue": "SYNTHETIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/490" + }, + "value": { + "annotationValue": "Hi-C", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/491" + }, + "value": { + "annotationValue": "Oligo-dT", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/492" + }, + "value": { + "annotationValue": "PAIRED", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/493" + }, + "value": { + "annotationValue": "876", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/494" + }, + "value": { + "annotationValue": "LS454", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/495" + }, + "value": { + "annotationValue": "454 GS FLX", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/167" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/167" + }, + "inputs": [ + { + "@id": "#sample/165" + } + ], + "outputs": [ + { + "@id": "#sample/167" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/168", + "name": "", + "executesProtocol": { + "@id": "#protocol/_32" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/168" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/166" + } + ], + "outputs": [ + { + "@id": "#sample/168" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/169", + "name": "", + "executesProtocol": { + "@id": "#protocol/_32" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/169" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/167" + } + ], + "outputs": [ + { + "@id": "#sample/169" + } + ] + } + ], + "dataFiles": [ + { + "@id": "#data_file/168", + "name": "Raw Data File 9", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data file 9.sff" + }, + { + "name": "file_type", + "value": "sff" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + }, + { + "@id": "#data_file/169", + "name": "Raw Data File 10", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data file 10.fastq" + }, + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + } + ], + "unitCategories": [] + } + ], + "factors": [], + "unitCategories": [] + } + ] +} \ No newline at end of file diff --git a/tests/test_ena_objects.py b/tests/test_ena_objects.py index e1456c0..6f5a917 100644 --- a/tests/test_ena_objects.py +++ b/tests/test_ena_objects.py @@ -1,22 +1,38 @@ import os +import jsonschema import pytest import json -from ena_objects.ena_study import EnaStudy, EnaSample, EnaExperiment, EnaRun -test_isa_jsonfile = open("tests/test_data/isa_json_test_investigation.json") +from rich import print_json +from ena_objects.characteristic import IsaBase +from ena_objects.ena_submission import EnaSubmission, EnaSample +from ena_objects.ena_std_lib import fetch_assay_streams, study_publication_ids +from ena_objects.ena_study import EnaStudy +test_isa_jsonfile = open( + "tests/test_data/multi_study_multi_assay_stream_investigation.json" +) test_isa_json = json.load(test_isa_jsonfile) +class TestEnaSubmission: + """Test class for Ena Submission objects""" + + def test_json_schema_validation(self): + bad_investigation_isa_json_file = open( + "tests/test_data/bad_investigation_isa_json.json" + ) + bad_investigation_isa_json = json.load(bad_investigation_isa_json_file) + + with pytest.raises(jsonschema.ValidationError): + IsaBase.validate_json( + bad_investigation_isa_json, EnaSubmission.investigation_schema + ) + + class TestEnaStudy: """Test class for Ena Study objects""" - bad_dict = { - "title": "My Title", - "study_description": "Should 'description'", - "publications": None, - } - test_study_dict = { "alias": "https://datahub.elixir-belgium.org/studies/2", "title": "ENA Upload Study", @@ -26,17 +42,20 @@ class TestEnaStudy: "pubmed_id": [1], } - def test_should_raise_key_error(self): - with pytest.raises( - KeyError, match="description was not found in the provided ISA JSON." - ): - EnaStudy.from_isa_json(self.bad_dict) - def test_ena_study_creation(self): - studies = EnaStudy.from_isa_json(test_isa_json) + studies = [study for study in test_isa_json["studies"]] + assay_streams = [] + ena_studies = [] + assert len(studies) == 2 + + for study in studies: + pubmed_ids = study_publication_ids(study["publications"]) + print(f"Study pubmed ids: {pubmed_ids}") + for assay_stream in fetch_assay_streams(study): + assay_streams.append(assay_stream) + ena_studies.append(EnaStudy.from_assay_stream(assay_stream, pubmed_ids)) - assert len(studies) == 1 - assert studies[0].to_dict() == self.test_study_dict + assert len(assay_streams) == 3 class TestEnaSample: @@ -44,7 +63,7 @@ class TestEnaSample: def test_sample_creation(self): samples = EnaSample.from_study_dict(self.study_dict) - assert len(samples) == 6 + assert len(samples) == 5 class TestEnaExperiment: From 650a4b160da50f8bfed2cb2798221a7c1d66a310 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 13 Oct 2023 16:53:46 +0200 Subject: [PATCH 40/62] Replace script by jupyter notebook --- example_read_isa_json.ipynb | 918 ++++++++++++++++++++++++++++++++++++ example_read_isa_json.py | 32 -- 2 files changed, 918 insertions(+), 32 deletions(-) create mode 100644 example_read_isa_json.ipynb delete mode 100644 example_read_isa_json.py diff --git a/example_read_isa_json.ipynb b/example_read_isa_json.ipynb new file mode 100644 index 0000000..daaf65c --- /dev/null +++ b/example_read_isa_json.ipynb @@ -0,0 +1,918 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example Script for parsing an ISA JSON" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import statements" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import os\n", + "from ena_objects.ena_submission import EnaSubmission\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Reading a JSON file" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Read json file\n", + "isa_json_file = open(\n", + " \"tests/test_data/multi_study_multi_assay_stream_investigation.json\"\n", + ")\n", + "isa_json = json.load(isa_json_file)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setting some extra parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "# Change this to 'True' if you want to export the resulting DataFrames to an xlsx.\n", + "export_to_excel = False\n", + "outputfolder = \"./output_folder/\"\n", + "\n", + "required_assays = [\n", + " {\"assay_stream\": \"Ena stream 1\"},\n", + " {\"ena_study_title\": \"Ena Study 2\"},\n", + " {\"ena_study_title\": \"Ena Study 3\"},\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Parsing" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "submission = EnaSubmission.from_isa_json(isa_json, required_assays)\n", + "submission_dfs = submission.generate_dataframes()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Output" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataframe study:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
aliastitlestudy_typestudy_abstractnew_study_typepubmed_id
0https://datahub.elixir-belgium.org/studies/27_28Ena Study 1Whole Genome SequencingThis is Ena Study 1.None5,6
1https://datahub.elixir-belgium.org/studies/29_30Ena Study 2OtherThis is Ena Study 2.My special study type5,6
2https://datahub.elixir-belgium.org/studies/31_32Ena Study 3OtherThis is Ena Study 3.My other special study type7
\n", + "
" + ], + "text/plain": [ + " alias title \\\n", + "0 https://datahub.elixir-belgium.org/studies/27_28 Ena Study 1 \n", + "1 https://datahub.elixir-belgium.org/studies/29_30 Ena Study 2 \n", + "2 https://datahub.elixir-belgium.org/studies/31_32 Ena Study 3 \n", + "\n", + " study_type study_abstract new_study_type \\\n", + "0 Whole Genome Sequencing This is Ena Study 1. None \n", + "1 Other This is Ena Study 2. My special study type \n", + "2 Other This is Ena Study 3. My other special study type \n", + "\n", + " pubmed_id \n", + "0 5,6 \n", + "1 5,6 \n", + "2 7 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataframe samples:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
aliastitlesample_descriptioncollection dateaccessionsubmission datestatusgeographic location (country and/or sea)taxon_id
0https://datahub.elixir-belgium.org/samples/142Sample title 1Sample description 12023Afghanistan1234
1https://datahub.elixir-belgium.org/samples/143Sample title 2Sample description 22022Afghanistan1234
2https://datahub.elixir-belgium.org/samples/144Sample title 3Sample description 32021Albania2345
3https://datahub.elixir-belgium.org/samples/145Sample title 4Sample description 42020Albania2345
4https://datahub.elixir-belgium.org/samples/164Sample title 5Sample description 52019Afghanistan9876
5https://datahub.elixir-belgium.org/samples/165Sample title 6Sample description 62018Albania8765
\n", + "
" + ], + "text/plain": [ + " alias title \\\n", + "0 https://datahub.elixir-belgium.org/samples/142 Sample title 1 \n", + "1 https://datahub.elixir-belgium.org/samples/143 Sample title 2 \n", + "2 https://datahub.elixir-belgium.org/samples/144 Sample title 3 \n", + "3 https://datahub.elixir-belgium.org/samples/145 Sample title 4 \n", + "4 https://datahub.elixir-belgium.org/samples/164 Sample title 5 \n", + "5 https://datahub.elixir-belgium.org/samples/165 Sample title 6 \n", + "\n", + " sample_description collection date accession submission date status \\\n", + "0 Sample description 1 2023 \n", + "1 Sample description 2 2022 \n", + "2 Sample description 3 2021 \n", + "3 Sample description 4 2020 \n", + "4 Sample description 5 2019 \n", + "5 Sample description 6 2018 \n", + "\n", + " geographic location (country and/or sea) taxon_id \n", + "0 Afghanistan 1234 \n", + "1 Afghanistan 1234 \n", + "2 Albania 2345 \n", + "3 Albania 2345 \n", + "4 Afghanistan 9876 \n", + "5 Albania 8765 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataframe experiments:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
aliasstudy_aliassample_aliaslibrary_nametitleaccessionsubmission datestatuslibrary_construction_protocoldesign_descriptionlibrary_sourcelibrary_strategylibrary_selectionlibrary_layoutinsert_sizeplatforminstrument_model
0https://datahub.elixir-belgium.org/samples/146https://datahub.elixir-belgium.org/studies/27_28https://datahub.elixir-belgium.org/samples/142Library 1Library title 1My special protocol 1Library description 1GENOMICWGSRANDOMSINGLE123LS454454 GS
1https://datahub.elixir-belgium.org/samples/147https://datahub.elixir-belgium.org/studies/27_28https://datahub.elixir-belgium.org/samples/143Library 2Library title 2My special protocol 2Library description 2GENOMIC SINGLE CELLWGAPCRPAIRED234IlluminaIllumina Genome Analyzer
2https://datahub.elixir-belgium.org/samples/148https://datahub.elixir-belgium.org/studies/27_28https://datahub.elixir-belgium.org/samples/144Library 3Library title 3My special protocol 3Library description 3TRANSCRIPTOMICWXSRANDOM PCRSINGLE345PacBioPacBio RS
3https://datahub.elixir-belgium.org/samples/149https://datahub.elixir-belgium.org/studies/27_28https://datahub.elixir-belgium.org/samples/145Library 4Library title 4My special protocol 4Library description 4TRANSCRIPTOMIC SINGLE CELLRNA-SeqRT-PCRPAIRED456Themo Fisher ScientificAB 3730xL Genetic Analyzer
4https://datahub.elixir-belgium.org/samples/154https://datahub.elixir-belgium.org/studies/29_30https://datahub.elixir-belgium.org/samples/142Library 5Library title 5My library construction protocol 5Library design description 5GENOMICssRNA-seqHMPRSINGLE123LS454454 GS 20
5https://datahub.elixir-belgium.org/samples/155https://datahub.elixir-belgium.org/studies/29_30https://datahub.elixir-belgium.org/samples/143Library 6Library title 6My library construction protocol 6Library design description 6GENOMIC SINGLE CELLmiRNA-SeqMFPAIRED234IlluminaIllumina Genome Analyzer II
6https://datahub.elixir-belgium.org/samples/156https://datahub.elixir-belgium.org/studies/29_30https://datahub.elixir-belgium.org/samples/144Library 7Library title 7My library construction protocol 7Library design description 7TRANSCRIPTOMICncRNA-Seqsize fractionationSINGLE345PacBioPacBio RS II
7https://datahub.elixir-belgium.org/samples/157https://datahub.elixir-belgium.org/studies/29_30https://datahub.elixir-belgium.org/samples/145Library 8Library title 8My library construction protocol 8Library design description 8SYNTHETICFL-cDNArepeat fractionationPAIRED456Themo Fisher ScientificAB 3730 Genetic Analyzer
8https://datahub.elixir-belgium.org/samples/166https://datahub.elixir-belgium.org/studies/31_32https://datahub.elixir-belgium.org/samples/164Library 9Library title 9My library construction protocol 9Library design description 9GENOMICESTMNaseSINGLE987Themo Fisher ScientificAB 3500xL Genetic Analyzer
9https://datahub.elixir-belgium.org/samples/167https://datahub.elixir-belgium.org/studies/31_32https://datahub.elixir-belgium.org/samples/165Library 10Library title 10My library construction protocol 10Library design description 10SYNTHETICHi-COligo-dTPAIRED876LS454454 GS FLX
\n", + "
" + ], + "text/plain": [ + " alias \\\n", + "0 https://datahub.elixir-belgium.org/samples/146 \n", + "1 https://datahub.elixir-belgium.org/samples/147 \n", + "2 https://datahub.elixir-belgium.org/samples/148 \n", + "3 https://datahub.elixir-belgium.org/samples/149 \n", + "4 https://datahub.elixir-belgium.org/samples/154 \n", + "5 https://datahub.elixir-belgium.org/samples/155 \n", + "6 https://datahub.elixir-belgium.org/samples/156 \n", + "7 https://datahub.elixir-belgium.org/samples/157 \n", + "8 https://datahub.elixir-belgium.org/samples/166 \n", + "9 https://datahub.elixir-belgium.org/samples/167 \n", + "\n", + " study_alias \\\n", + "0 https://datahub.elixir-belgium.org/studies/27_28 \n", + "1 https://datahub.elixir-belgium.org/studies/27_28 \n", + "2 https://datahub.elixir-belgium.org/studies/27_28 \n", + "3 https://datahub.elixir-belgium.org/studies/27_28 \n", + "4 https://datahub.elixir-belgium.org/studies/29_30 \n", + "5 https://datahub.elixir-belgium.org/studies/29_30 \n", + "6 https://datahub.elixir-belgium.org/studies/29_30 \n", + "7 https://datahub.elixir-belgium.org/studies/29_30 \n", + "8 https://datahub.elixir-belgium.org/studies/31_32 \n", + "9 https://datahub.elixir-belgium.org/studies/31_32 \n", + "\n", + " sample_alias library_name \\\n", + "0 https://datahub.elixir-belgium.org/samples/142 Library 1 \n", + "1 https://datahub.elixir-belgium.org/samples/143 Library 2 \n", + "2 https://datahub.elixir-belgium.org/samples/144 Library 3 \n", + "3 https://datahub.elixir-belgium.org/samples/145 Library 4 \n", + "4 https://datahub.elixir-belgium.org/samples/142 Library 5 \n", + "5 https://datahub.elixir-belgium.org/samples/143 Library 6 \n", + "6 https://datahub.elixir-belgium.org/samples/144 Library 7 \n", + "7 https://datahub.elixir-belgium.org/samples/145 Library 8 \n", + "8 https://datahub.elixir-belgium.org/samples/164 Library 9 \n", + "9 https://datahub.elixir-belgium.org/samples/165 Library 10 \n", + "\n", + " title accession submission date status \\\n", + "0 Library title 1 \n", + "1 Library title 2 \n", + "2 Library title 3 \n", + "3 Library title 4 \n", + "4 Library title 5 \n", + "5 Library title 6 \n", + "6 Library title 7 \n", + "7 Library title 8 \n", + "8 Library title 9 \n", + "9 Library title 10 \n", + "\n", + " library_construction_protocol design_description \\\n", + "0 My special protocol 1 Library description 1 \n", + "1 My special protocol 2 Library description 2 \n", + "2 My special protocol 3 Library description 3 \n", + "3 My special protocol 4 Library description 4 \n", + "4 My library construction protocol 5 Library design description 5 \n", + "5 My library construction protocol 6 Library design description 6 \n", + "6 My library construction protocol 7 Library design description 7 \n", + "7 My library construction protocol 8 Library design description 8 \n", + "8 My library construction protocol 9 Library design description 9 \n", + "9 My library construction protocol 10 Library design description 10 \n", + "\n", + " library_source library_strategy library_selection \\\n", + "0 GENOMIC WGS RANDOM \n", + "1 GENOMIC SINGLE CELL WGA PCR \n", + "2 TRANSCRIPTOMIC WXS RANDOM PCR \n", + "3 TRANSCRIPTOMIC SINGLE CELL RNA-Seq RT-PCR \n", + "4 GENOMIC ssRNA-seq HMPR \n", + "5 GENOMIC SINGLE CELL miRNA-Seq MF \n", + "6 TRANSCRIPTOMIC ncRNA-Seq size fractionation \n", + "7 SYNTHETIC FL-cDNA repeat fractionation \n", + "8 GENOMIC EST MNase \n", + "9 SYNTHETIC Hi-C Oligo-dT \n", + "\n", + " library_layout insert_size platform \\\n", + "0 SINGLE 123 LS454 \n", + "1 PAIRED 234 Illumina \n", + "2 SINGLE 345 PacBio \n", + "3 PAIRED 456 Themo Fisher Scientific \n", + "4 SINGLE 123 LS454 \n", + "5 PAIRED 234 Illumina \n", + "6 SINGLE 345 PacBio \n", + "7 PAIRED 456 Themo Fisher Scientific \n", + "8 SINGLE 987 Themo Fisher Scientific \n", + "9 PAIRED 876 LS454 \n", + "\n", + " instrument_model \n", + "0 454 GS \n", + "1 Illumina Genome Analyzer \n", + "2 PacBio RS \n", + "3 AB 3730xL Genetic Analyzer \n", + "4 454 GS 20 \n", + "5 Illumina Genome Analyzer II \n", + "6 PacBio RS II \n", + "7 AB 3730 Genetic Analyzer \n", + "8 AB 3500xL Genetic Analyzer \n", + "9 454 GS FLX " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataframe runs:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
aliasexperiment_aliasfile_namefile_typefile checksumaccessionsubmission datestatus
0https://datahub.elixir-belgium.org/samples/150ena_run_alias_prefix146data_file_1.bambam
1https://datahub.elixir-belgium.org/samples/151ena_run_alias_prefix147data_file_2.cramcram
2https://datahub.elixir-belgium.org/samples/152ena_run_alias_prefix148data_file_3.fastqfastq
3https://datahub.elixir-belgium.org/samples/153ena_run_alias_prefix149data_file_4.sffsff
4https://datahub.elixir-belgium.org/samples/158ena_run_alias_prefix154data file 5.bambam
5https://datahub.elixir-belgium.org/samples/159ena_run_alias_prefix155data file 6.cramcram
6https://datahub.elixir-belgium.org/samples/160ena_run_alias_prefix156data file 7.fastqfastq
7https://datahub.elixir-belgium.org/samples/161ena_run_alias_prefix157data file 8.sffsff
8https://datahub.elixir-belgium.org/samples/168ena_run_alias_prefix166data file 9.sffsff
9https://datahub.elixir-belgium.org/samples/169ena_run_alias_prefix167data file 10.fastqfastq
\n", + "
" + ], + "text/plain": [ + " alias experiment_alias \\\n", + "0 https://datahub.elixir-belgium.org/samples/150 ena_run_alias_prefix146 \n", + "1 https://datahub.elixir-belgium.org/samples/151 ena_run_alias_prefix147 \n", + "2 https://datahub.elixir-belgium.org/samples/152 ena_run_alias_prefix148 \n", + "3 https://datahub.elixir-belgium.org/samples/153 ena_run_alias_prefix149 \n", + "4 https://datahub.elixir-belgium.org/samples/158 ena_run_alias_prefix154 \n", + "5 https://datahub.elixir-belgium.org/samples/159 ena_run_alias_prefix155 \n", + "6 https://datahub.elixir-belgium.org/samples/160 ena_run_alias_prefix156 \n", + "7 https://datahub.elixir-belgium.org/samples/161 ena_run_alias_prefix157 \n", + "8 https://datahub.elixir-belgium.org/samples/168 ena_run_alias_prefix166 \n", + "9 https://datahub.elixir-belgium.org/samples/169 ena_run_alias_prefix167 \n", + "\n", + " file_name file_type file checksum accession submission date status \n", + "0 data_file_1.bam bam \n", + "1 data_file_2.cram cram \n", + "2 data_file_3.fastq fastq \n", + "3 data_file_4.sff sff \n", + "4 data file 5.bam bam \n", + "5 data file 6.cram cram \n", + "6 data file 7.fastq fastq \n", + "7 data file 8.sff sff \n", + "8 data file 9.sff sff \n", + "9 data file 10.fastq fastq " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done!\n" + ] + } + ], + "source": [ + "\n", + "if (not os.path.exists(outputfolder)) and export_to_excel:\n", + " os.makedirs(outputfolder)\n", + "\n", + "for k, df in submission_dfs.items():\n", + " print(f\"Dataframe {k}:\")\n", + " display(df)\n", + " if export_to_excel:\n", + " df.to_excel(f\"{outputfolder}{k}.xlsx\")\n", + "\n", + "print(\"Done!\")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/example_read_isa_json.py b/example_read_isa_json.py deleted file mode 100644 index dbc5854..0000000 --- a/example_read_isa_json.py +++ /dev/null @@ -1,32 +0,0 @@ -import json -import os -from ena_objects.ena_submission import EnaSubmission - -# Read json file -isa_json_file = open( - "tests/test_data/multi_study_multi_assay_stream_investigation.json" -) -isa_json = json.load(isa_json_file) - -# Change this to 'True' if you want to export the resulting DataFrames to an xlsx. -export_to_excel = True -outputfolder = "./output_folder/" - -required_assays = [ - {"assay_stream": "Ena stream 1"}, - {"ena_study_title": "Ena Study 2"}, -] - -submission = EnaSubmission.from_isa_json(isa_json, required_assays) -submission_dfs = submission.generate_dataframes() - -if (not os.path.exists(outputfolder)) and export_to_excel: - os.makedirs(outputfolder) - -for k, df in submission_dfs.items(): - print(f"Dataframe {k}:") - print(df) - if export_to_excel: - df.to_excel(f"{outputfolder}{k}.xlsx") - -print("Done!") From 62dd1df122f8a22e9b5195e7f45e7d7e73925aeb Mon Sep 17 00:00:00 2001 From: Bert Droesbeke <44875756+bedroesb@users.noreply.github.com> Date: Fri, 20 Oct 2023 13:28:01 +0200 Subject: [PATCH 41/62] Restructure isa json support (#1) * start with nan fix * instrument_model * update templates * updating xml templates * version bump * not everything mandatory * some improvements * unicode problem * encode with utf-8 * add updated templates * update minimal example * update templates * update readme * move structure * update more paths * start with integration * get example working --- README.md | 15 ++--- ena_upload/_version.py | 2 +- ena_upload/ena_upload.py | 48 +++++++++++--- .../json_parsing}/__init__.py | 0 .../json_parsing}/characteristic.py | 2 +- .../json_parsing}/ena_experiment.py | 8 +-- .../json_parsing}/ena_run.py | 4 +- .../json_parsing}/ena_sample.py | 4 +- .../json_parsing}/ena_std_lib.py | 0 .../json_parsing}/ena_study.py | 4 +- .../json_parsing}/ena_submission.py | 18 +++--- .../json_schemas/assay_schema.json | 0 .../json_schemas/comment_schema.json | 0 .../json_schemas/data_schema.json | 0 .../json_schemas/factor_schema.json | 0 .../json_schemas/factor_value_schema.json | 0 .../json_schemas/investigation_schema.json | 0 .../material_attribute_schema.json | 0 .../material_attribute_value_schema.json | 0 .../json_schemas/material_schema.json | 0 .../ontology_annotation_schema.json | 0 .../ontology_source_reference_schema.json | 0 .../json_schemas/organization_schema.json | 0 .../json_schemas/person_schema.json | 0 .../process_parameter_value_schema.json | 0 .../json_schemas/process_schema.json | 0 .../protocol_parameter_schema.json | 0 .../json_schemas/protocol_schema.json | 0 .../json_schemas/publication_schema.json | 0 .../json_schemas/sample_schema.json | 0 .../json_schemas/source_schema.json | 0 .../json_schemas/study_schema.json | 0 .../json_parsing}/other_material.py | 2 +- .../templates/ENA_template_experiments.xml | 31 ++++++++- ena_upload/templates/ENA_template_runs.xml | 10 ++- .../ENA_template_samples_ERC000011.xml | 19 +++++- .../ENA_template_samples_ERC000012.xml | 33 +++++++++- .../ENA_template_samples_ERC000013.xml | 31 ++++++++- .../ENA_template_samples_ERC000014.xml | 31 ++++++++- .../ENA_template_samples_ERC000015.xml | 31 ++++++++- .../ENA_template_samples_ERC000016.xml | 31 ++++++++- .../ENA_template_samples_ERC000017.xml | 31 ++++++++- .../ENA_template_samples_ERC000018.xml | 31 ++++++++- .../ENA_template_samples_ERC000019.xml | 35 ++++++++++- .../ENA_template_samples_ERC000020.xml | 31 ++++++++- .../ENA_template_samples_ERC000021.xml | 35 ++++++++++- .../ENA_template_samples_ERC000022.xml | 35 ++++++++++- .../ENA_template_samples_ERC000023.xml | 31 ++++++++- .../ENA_template_samples_ERC000024.xml | 33 +++++++++- .../ENA_template_samples_ERC000025.xml | 31 ++++++++- .../ENA_template_samples_ERC000027.xml | 49 ++++++++++++++- .../ENA_template_samples_ERC000028.xml | 27 +++++++- .../ENA_template_samples_ERC000029.xml | 35 ++++++++++- .../ENA_template_samples_ERC000030.xml | 49 ++++++++++++++- .../ENA_template_samples_ERC000031.xml | 63 ++++++++++++++++++- .../ENA_template_samples_ERC000032.xml | 41 +++++++++++- .../ENA_template_samples_ERC000033.xml | 35 ++++++++++- .../ENA_template_samples_ERC000034.xml | 27 +++++++- .../ENA_template_samples_ERC000035.xml | 19 +++++- .../ENA_template_samples_ERC000036.xml | 23 ++++++- .../ENA_template_samples_ERC000037.xml | 31 ++++++++- .../ENA_template_samples_ERC000038.xml | 53 +++++++++++++++- .../ENA_template_samples_ERC000039.xml | 27 +++++++- .../ENA_template_samples_ERC000040.xml | 37 ++++++++++- .../ENA_template_samples_ERC000041.xml | 21 ++++++- .../ENA_template_samples_ERC000043.xml | 21 ++++++- .../ENA_template_samples_ERC000044.xml | 25 +++++++- .../ENA_template_samples_ERC000045.xml | 23 ++++++- .../ENA_template_samples_ERC000047.xml | 51 ++++++++++++++- .../ENA_template_samples_ERC000048.xml | 43 ++++++++++++- .../ENA_template_samples_ERC000049.xml | 53 +++++++++++++++- .../ENA_template_samples_ERC000050.xml | 47 +++++++++++++- .../ENA_template_samples_ERC000051.xml | 41 +++++++++++- .../ENA_template_samples_ERC000052.xml | 39 +++++++++++- .../ENA_template_samples_ERC000053.xml | 35 ++++++++++- ena_upload/templates/ENA_template_studies.xml | 18 ++++-- .../jinja_templates/ENA_template_samples.xml | 29 +++++---- example_read_isa_json.ipynb | 2 +- example_tables/ENA_template_samples.tsv | 6 +- receipt.xml | 12 ++++ requirements.txt | 3 +- setup.py | 2 +- tests/test_ena_objects.py | 10 ++- var/xml_converter.py | 4 +- 84 files changed, 1467 insertions(+), 151 deletions(-) rename {ena_objects => ena_upload/json_parsing}/__init__.py (100%) rename {ena_objects => ena_upload/json_parsing}/characteristic.py (97%) rename {ena_objects => ena_upload/json_parsing}/ena_experiment.py (97%) rename {ena_objects => ena_upload/json_parsing}/ena_run.py (98%) rename {ena_objects => ena_upload/json_parsing}/ena_sample.py (96%) rename {ena_objects => ena_upload/json_parsing}/ena_std_lib.py (100%) rename {ena_objects => ena_upload/json_parsing}/ena_study.py (96%) rename {ena_objects => ena_upload/json_parsing}/ena_submission.py (87%) rename {ena_objects => ena_upload/json_parsing}/json_schemas/assay_schema.json (100%) rename {ena_objects => ena_upload/json_parsing}/json_schemas/comment_schema.json (100%) rename {ena_objects => ena_upload/json_parsing}/json_schemas/data_schema.json (100%) rename {ena_objects => ena_upload/json_parsing}/json_schemas/factor_schema.json (100%) rename {ena_objects => ena_upload/json_parsing}/json_schemas/factor_value_schema.json (100%) rename {ena_objects => ena_upload/json_parsing}/json_schemas/investigation_schema.json (100%) rename {ena_objects => ena_upload/json_parsing}/json_schemas/material_attribute_schema.json (100%) rename {ena_objects => ena_upload/json_parsing}/json_schemas/material_attribute_value_schema.json (100%) rename {ena_objects => ena_upload/json_parsing}/json_schemas/material_schema.json (100%) rename {ena_objects => ena_upload/json_parsing}/json_schemas/ontology_annotation_schema.json (100%) rename {ena_objects => ena_upload/json_parsing}/json_schemas/ontology_source_reference_schema.json (100%) rename {ena_objects => ena_upload/json_parsing}/json_schemas/organization_schema.json (100%) rename {ena_objects => ena_upload/json_parsing}/json_schemas/person_schema.json (100%) rename {ena_objects => ena_upload/json_parsing}/json_schemas/process_parameter_value_schema.json (100%) rename {ena_objects => ena_upload/json_parsing}/json_schemas/process_schema.json (100%) rename {ena_objects => ena_upload/json_parsing}/json_schemas/protocol_parameter_schema.json (100%) rename {ena_objects => ena_upload/json_parsing}/json_schemas/protocol_schema.json (100%) rename {ena_objects => ena_upload/json_parsing}/json_schemas/publication_schema.json (100%) rename {ena_objects => ena_upload/json_parsing}/json_schemas/sample_schema.json (100%) rename {ena_objects => ena_upload/json_parsing}/json_schemas/source_schema.json (100%) rename {ena_objects => ena_upload/json_parsing}/json_schemas/study_schema.json (100%) rename {ena_objects => ena_upload/json_parsing}/other_material.py (96%) create mode 100644 receipt.xml diff --git a/README.md b/README.md index 92a41bd..f819669 100644 --- a/README.md +++ b/README.md @@ -94,13 +94,13 @@ You can specify ENA sample checklist using the `--checklist` parameter. By defau The command line tool will automatically fetch the correct scientific name based on the taxon ID or fetch the taxon ID based on the scientific name. Both can be given and no overwrite will be done. -- Mandatory: *alias*, *title*, *sample_description* and either *scientific_name* or *taxon_id* (preferred) -- Optional: *common_name* +- Mandatory: *alias*, *title*, *sample_description*, *collection date*, *geographic location (country and/or sea)* and either *scientific_name* or *taxon_id* (preferred) +- Optional: *common_name*, *sample_description* -| alias | title | taxon_id | scientific_name | common_name | sample_description | -|----------------|----------------|----------|-------------------------------------------------|-------------|----------------------| -| sample_alias_4 | sample_title_2 | 2697049 | Severe acute respiratory syndrome coronavirus 2 | covid-19 | sample_description_1 | -| sample_alias_5 | sample_title_3 | 2697049 | Severe acute respiratory syndrome coronavirus 2 | covid-19 | sample_description_2 | +| alias | title | taxon_id | scientific_name | common_name | sample_description | collection date | geographic location (country and/or sea) | +|----------------|----------------|----------|-------------------------------------------------|-------------|----------------------|-----------------|------------------------------------------| +| sample_alias_4 | sample_title_2 | 2697049 | Severe acute respiratory syndrome coronavirus 2 | covid-19 | sample_description_1 | 2020-10-11 | Argentina | +| sample_alias_5 | sample_title_3 | 2697049 | Severe acute respiratory syndrome coronavirus 2 | covid-19 | sample_description_2 | 2008-01-24 | Belgium | #### Viral submissions @@ -133,13 +133,14 @@ Currently we refer to the [ENA Webin](https://wwwdev.ebi.ac.uk/ena/submit/webin/ | sample_alias | mandatory | Pick a sample to associate this experiment with. The sample may be an individual or a pool, depending on how it is specified. | | | design_description | mandatory | Goal and setup of the individual library including library was constructed. | | | spot_descriptor | optional | The SPOT_DESCRIPTOR specifies how to decode the individual reads of interest from the monolithic spot sequence. The spot descriptor contains aspects of the experimental design, platform, and processing information. There will be two methods of specification: one will be an index into a table of typical decodings, the other being an exact specification. This construct is needed for loading data and for interpreting the loaded runs. It can be omitted if the loader can infer read layout (from multiple input files or from one input files). | | -| library_name | mandatory | The submitter's name for this library. | | +| library_name | optional | The submitter's name for this library. | | | library_layout | mandatory | LIBRARY_LAYOUT specifies whether to expect single, paired, or other configuration of reads. In the case of paired reads, information about the relative distance and orientation is specified. | yes | | insert_size | mandatory | Relative distance. | | | library_strategy | mandatory | Sequencing technique intended for this library | yes | | library_source | mandatory | The LIBRARY_SOURCE specifies the type of source material that is being sequenced. | yes | | library_selection | mandatory | Method used to enrich the target in the sequence library preparation | yes | | platform | mandatory | The PLATFORM record selects which sequencing platform and platform-specific runtime parameters. This will be determined by the Center. | yes | +| instrument_model | mandatory | Model of the sequencing instrument. | yes | | library_construction_protocol | optional | Free form text describing the protocol by which the sequencing library was constructed. | | diff --git a/ena_upload/_version.py b/ena_upload/_version.py index 63af887..364e7ba 100644 --- a/ena_upload/_version.py +++ b/ena_upload/_version.py @@ -1 +1 @@ -__version__ = "0.6.3" +__version__ = "0.6.4" diff --git a/ena_upload/ena_upload.py b/ena_upload/ena_upload.py index fa601e7..dc07097 100755 --- a/ena_upload/ena_upload.py +++ b/ena_upload/ena_upload.py @@ -12,6 +12,7 @@ import hashlib import ftplib import requests +import json import uuid import numpy as np import re @@ -21,6 +22,8 @@ import tempfile from ena_upload._version import __version__ from ena_upload.check_remote import remote_check +from ena_upload.json_parsing.ena_submission import EnaSubmission + SCHEMA_TYPES = ['study', 'experiment', 'run', 'sample'] @@ -55,7 +58,7 @@ def create_dataframe(schema_tables, action, dev, auto_action): schema_dataframe = {} for schema, table in schema_tables.items(): - df = pd.read_csv(table, sep='\t', comment='#', dtype=str) + df = pd.read_csv(table, sep='\t', comment='#', dtype=str, na_values=["NA", "Na", "na", "NaN"]) df = df.dropna(how='all') df = check_columns(df, schema, action, dev, auto_action) schema_dataframe[schema] = df @@ -294,7 +297,7 @@ def run_construct(template_path, schema_targets, center, checklist, tool): template = templates[schema] Template = loader.load(template) stream = generate_stream(schema, targets, Template, center, tool) - + print(f"Constructing XML for '{schema}' schema") schema_xmls[schema] = construct_xml(schema, stream, xsds[schema]) return schema_xmls @@ -315,7 +318,7 @@ def construct_submission(template_path, action, submission_input, center, checkl :return submission_xml: filename of submission XML ''' - print("Constructing submission") + print(f"Constructing XML for submission schema") xsds, templates = actors(template_path, checklist) @@ -325,7 +328,6 @@ def construct_submission(template_path, action, submission_input, center, checkl stream = Template.generate(action=action, input=submission_input, center=center, tool_name=tool['tool_name'], tool_version=tool['tool_version']) - submission_xml = construct_xml('submission', stream, xsds['submission']) return submission_xml @@ -713,6 +715,12 @@ def process_args(): parser.add_argument('--xlsx', help='filled in excel template with metadata') + + parser.add_argument('--isa_json', + help='ISA json describing describing the ENA objects') + + parser.add_argument('--isa_assay_stream', + help='specify the assay stream that holds the ENA information') parser.add_argument('--auto_action', action="store_true", @@ -750,7 +758,7 @@ def process_args(): # check if any table is given tables = set([args.study, args.sample, args.experiment, args.run]) - if tables == {None} and not args.xlsx: + if tables == {None} and not args.xlsx and not args.isa_json: parser.error('Requires at least one table for submission') # check if .secret file exists @@ -765,6 +773,14 @@ def process_args(): msg = f"Oops, the file {args.xlsx} does not exist" parser.error(msg) + # check if ISA json file exists + if args.isa_json: + if not os.path.isfile(args.isa_json): + msg = f"Oops, the file {args.isa_json} does not exist" + parser.error(msg) + if args.isa_assay_stream is None : + parser.error("--isa_json requires --isa_assay_stream") + # check if data is given when adding a 'run' table if (not args.no_data_upload and args.run and args.action.upper() not in ['RELEASE', 'CANCEL']) or (not args.no_data_upload and args.xlsx and args.action.upper() not in ['RELEASE', 'CANCEL']): if args.data is None: @@ -817,6 +833,8 @@ def main(): secret = args.secret draft = args.draft xlsx = args.xlsx + isa_json_file = args.isa_json + isa_assay_stream = args.isa_assay_stream auto_action = args.auto_action with open(secret, 'r') as secret_file: @@ -838,9 +856,9 @@ def main(): for schema in SCHEMA_TYPES: if schema in xl_workbook.book.sheetnames: - xl_sheet = xl_workbook.parse(schema, header=0) + xl_sheet = xl_workbook.parse(schema, header=0, na_values=["NA", "Na", "na", "NaN"]) elif f"ENA_{schema}" in xl_workbook.book.sheetnames: - xl_sheet = xl_workbook.parse(f"ENA_{schema}", header=0) + xl_sheet = xl_workbook.parse(f"ENA_{schema}", header=0, na_values=["NA", "Na", "na", "NaN"]) else: sys.exit( f"The sheet '{schema}' is not present in the excel sheet {xlsx}") @@ -858,6 +876,22 @@ def main(): schema_dataframe[schema] = xl_sheet path = os.path.dirname(os.path.abspath(xlsx)) schema_tables[schema] = f"{path}/ENA_template_{schema}.tsv" + elif isa_json_file: + # Read json file + isa_json = json.load(open(isa_json_file)) + + schema_tables = {} + schema_dataframe = {} + required_assays = [{"assay_stream": isa_assay_stream}] + submission = EnaSubmission.from_isa_json(isa_json, required_assays) + submission_dataframes = submission.generate_dataframes() + for schema, df in submission_dataframes.items(): + schema_dataframe[schema] = check_columns( + df, schema, action, dev, auto_action) + path = os.path.dirname(os.path.abspath(isa_json_file)) + schema_tables[schema] = f"{path}/ENA_template_{schema}.tsv" + + else: # collect the schema with table input from command-line schema_tables = collect_tables(args) diff --git a/ena_objects/__init__.py b/ena_upload/json_parsing/__init__.py similarity index 100% rename from ena_objects/__init__.py rename to ena_upload/json_parsing/__init__.py diff --git a/ena_objects/characteristic.py b/ena_upload/json_parsing/characteristic.py similarity index 97% rename from ena_objects/characteristic.py rename to ena_upload/json_parsing/characteristic.py index 2fb22bf..84b2b60 100644 --- a/ena_objects/characteristic.py +++ b/ena_upload/json_parsing/characteristic.py @@ -11,7 +11,7 @@ class IsaBase: @classmethod def validate_json(self, isa_json: Dict[str, str], schema): - schema_path = os.path.join(os.curdir, "ena_objects", "json_schemas", schema) + schema_path = os.path.join(os.curdir, "ena_upload", "json_parsing", "json_schemas", schema) json_file = open(schema_path) json_schema = json.load(json_file) diff --git a/ena_objects/ena_experiment.py b/ena_upload/json_parsing/ena_experiment.py similarity index 97% rename from ena_objects/ena_experiment.py rename to ena_upload/json_parsing/ena_experiment.py index cdc7ac6..c177213 100644 --- a/ena_objects/ena_experiment.py +++ b/ena_upload/json_parsing/ena_experiment.py @@ -2,18 +2,18 @@ from pandas import DataFrame -from ena_objects.ena_std_lib import ( +from ena_upload.json_parsing.ena_std_lib import ( fetch_assay_comment_by_name, get_assay_sample_associations, clip_off_prefix, ) -from ena_objects.characteristic import ( +from ena_upload.json_parsing.characteristic import ( IsaBase, OtherMaterialCharacteristic, ParameterValue, ) -from ena_objects.ena_sample import EnaSample -from ena_objects.other_material import OtherMaterial +from ena_upload.json_parsing.ena_sample import EnaSample +from ena_upload.json_parsing.other_material import OtherMaterial def experiment_alias(other_material: OtherMaterial, prefix: str) -> str: diff --git a/ena_objects/ena_run.py b/ena_upload/json_parsing/ena_run.py similarity index 98% rename from ena_objects/ena_run.py rename to ena_upload/json_parsing/ena_run.py index 058bf29..d59e763 100644 --- a/ena_objects/ena_run.py +++ b/ena_upload/json_parsing/ena_run.py @@ -2,8 +2,8 @@ from pandas import DataFrame -from ena_objects.characteristic import IsaBase -from ena_objects.ena_std_lib import ( +from ena_upload.json_parsing.characteristic import IsaBase +from ena_upload.json_parsing.ena_std_lib import ( fetch_assay_comment_by_name, get_assay_sample_associations, clip_off_prefix, diff --git a/ena_objects/ena_sample.py b/ena_upload/json_parsing/ena_sample.py similarity index 96% rename from ena_objects/ena_sample.py rename to ena_upload/json_parsing/ena_sample.py index 8248878..b68beba 100644 --- a/ena_objects/ena_sample.py +++ b/ena_upload/json_parsing/ena_sample.py @@ -1,9 +1,9 @@ from typing import List, Dict -from ena_objects.characteristic import SampleCharacteristic +from ena_upload.json_parsing.characteristic import SampleCharacteristic from pandas import DataFrame -from ena_objects.ena_std_lib import clip_off_prefix, fetch_study_comment_by_name +from ena_upload.json_parsing.ena_std_lib import clip_off_prefix, fetch_study_comment_by_name def fetch_characteristic_categories(study_dict: Dict) -> Dict: diff --git a/ena_objects/ena_std_lib.py b/ena_upload/json_parsing/ena_std_lib.py similarity index 100% rename from ena_objects/ena_std_lib.py rename to ena_upload/json_parsing/ena_std_lib.py diff --git a/ena_objects/ena_study.py b/ena_upload/json_parsing/ena_study.py similarity index 96% rename from ena_objects/ena_study.py rename to ena_upload/json_parsing/ena_study.py index e589a71..10d1798 100644 --- a/ena_objects/ena_study.py +++ b/ena_upload/json_parsing/ena_study.py @@ -1,7 +1,7 @@ from typing import List, Optional, Dict from pandas import DataFrame -from ena_objects.characteristic import IsaBase -from ena_objects.ena_std_lib import ( +from ena_upload.json_parsing.characteristic import IsaBase +from ena_upload.json_parsing.ena_std_lib import ( clip_off_prefix, fetch_assay_comment_by_name, ) diff --git a/ena_objects/ena_submission.py b/ena_upload/json_parsing/ena_submission.py similarity index 87% rename from ena_objects/ena_submission.py rename to ena_upload/json_parsing/ena_submission.py index 01f3340..9ab92bb 100644 --- a/ena_objects/ena_submission.py +++ b/ena_upload/json_parsing/ena_submission.py @@ -1,20 +1,20 @@ from typing import List, Dict from pandas import DataFrame -from ena_objects.characteristic import IsaBase -from ena_objects.ena_experiment import ( +from ena_upload.json_parsing.characteristic import IsaBase +from ena_upload.json_parsing.ena_experiment import ( EnaExperiment, export_experiments_to_dataframe, ) -from ena_objects.ena_run import EnaRun, export_runs_to_dataframe -from ena_objects.ena_sample import EnaSample, export_samples_to_dataframe -from ena_objects.ena_std_lib import ( +from ena_upload.json_parsing.ena_run import EnaRun, export_runs_to_dataframe +from ena_upload.json_parsing.ena_sample import EnaSample, export_samples_to_dataframe +from ena_upload.json_parsing.ena_std_lib import ( fetch_assay_streams, fetch_study_comment_by_name, study_publication_ids, ) -from ena_objects.ena_study import EnaStudy, export_studies_to_dataframe +from ena_upload.json_parsing.ena_study import EnaStudy, export_studies_to_dataframe def fetch_assay(assay, required_assays): @@ -143,7 +143,7 @@ def generate_dataframes(self) -> Dict[str, DataFrame]: """ return { "study": export_studies_to_dataframe(self.studies), - "samples": export_samples_to_dataframe(self.samples), - "experiments": export_experiments_to_dataframe(self.experiments), - "runs": export_runs_to_dataframe(self.runs), + "sample": export_samples_to_dataframe(self.samples), + "experiment": export_experiments_to_dataframe(self.experiments), + "run": export_runs_to_dataframe(self.runs), } diff --git a/ena_objects/json_schemas/assay_schema.json b/ena_upload/json_parsing/json_schemas/assay_schema.json similarity index 100% rename from ena_objects/json_schemas/assay_schema.json rename to ena_upload/json_parsing/json_schemas/assay_schema.json diff --git a/ena_objects/json_schemas/comment_schema.json b/ena_upload/json_parsing/json_schemas/comment_schema.json similarity index 100% rename from ena_objects/json_schemas/comment_schema.json rename to ena_upload/json_parsing/json_schemas/comment_schema.json diff --git a/ena_objects/json_schemas/data_schema.json b/ena_upload/json_parsing/json_schemas/data_schema.json similarity index 100% rename from ena_objects/json_schemas/data_schema.json rename to ena_upload/json_parsing/json_schemas/data_schema.json diff --git a/ena_objects/json_schemas/factor_schema.json b/ena_upload/json_parsing/json_schemas/factor_schema.json similarity index 100% rename from ena_objects/json_schemas/factor_schema.json rename to ena_upload/json_parsing/json_schemas/factor_schema.json diff --git a/ena_objects/json_schemas/factor_value_schema.json b/ena_upload/json_parsing/json_schemas/factor_value_schema.json similarity index 100% rename from ena_objects/json_schemas/factor_value_schema.json rename to ena_upload/json_parsing/json_schemas/factor_value_schema.json diff --git a/ena_objects/json_schemas/investigation_schema.json b/ena_upload/json_parsing/json_schemas/investigation_schema.json similarity index 100% rename from ena_objects/json_schemas/investigation_schema.json rename to ena_upload/json_parsing/json_schemas/investigation_schema.json diff --git a/ena_objects/json_schemas/material_attribute_schema.json b/ena_upload/json_parsing/json_schemas/material_attribute_schema.json similarity index 100% rename from ena_objects/json_schemas/material_attribute_schema.json rename to ena_upload/json_parsing/json_schemas/material_attribute_schema.json diff --git a/ena_objects/json_schemas/material_attribute_value_schema.json b/ena_upload/json_parsing/json_schemas/material_attribute_value_schema.json similarity index 100% rename from ena_objects/json_schemas/material_attribute_value_schema.json rename to ena_upload/json_parsing/json_schemas/material_attribute_value_schema.json diff --git a/ena_objects/json_schemas/material_schema.json b/ena_upload/json_parsing/json_schemas/material_schema.json similarity index 100% rename from ena_objects/json_schemas/material_schema.json rename to ena_upload/json_parsing/json_schemas/material_schema.json diff --git a/ena_objects/json_schemas/ontology_annotation_schema.json b/ena_upload/json_parsing/json_schemas/ontology_annotation_schema.json similarity index 100% rename from ena_objects/json_schemas/ontology_annotation_schema.json rename to ena_upload/json_parsing/json_schemas/ontology_annotation_schema.json diff --git a/ena_objects/json_schemas/ontology_source_reference_schema.json b/ena_upload/json_parsing/json_schemas/ontology_source_reference_schema.json similarity index 100% rename from ena_objects/json_schemas/ontology_source_reference_schema.json rename to ena_upload/json_parsing/json_schemas/ontology_source_reference_schema.json diff --git a/ena_objects/json_schemas/organization_schema.json b/ena_upload/json_parsing/json_schemas/organization_schema.json similarity index 100% rename from ena_objects/json_schemas/organization_schema.json rename to ena_upload/json_parsing/json_schemas/organization_schema.json diff --git a/ena_objects/json_schemas/person_schema.json b/ena_upload/json_parsing/json_schemas/person_schema.json similarity index 100% rename from ena_objects/json_schemas/person_schema.json rename to ena_upload/json_parsing/json_schemas/person_schema.json diff --git a/ena_objects/json_schemas/process_parameter_value_schema.json b/ena_upload/json_parsing/json_schemas/process_parameter_value_schema.json similarity index 100% rename from ena_objects/json_schemas/process_parameter_value_schema.json rename to ena_upload/json_parsing/json_schemas/process_parameter_value_schema.json diff --git a/ena_objects/json_schemas/process_schema.json b/ena_upload/json_parsing/json_schemas/process_schema.json similarity index 100% rename from ena_objects/json_schemas/process_schema.json rename to ena_upload/json_parsing/json_schemas/process_schema.json diff --git a/ena_objects/json_schemas/protocol_parameter_schema.json b/ena_upload/json_parsing/json_schemas/protocol_parameter_schema.json similarity index 100% rename from ena_objects/json_schemas/protocol_parameter_schema.json rename to ena_upload/json_parsing/json_schemas/protocol_parameter_schema.json diff --git a/ena_objects/json_schemas/protocol_schema.json b/ena_upload/json_parsing/json_schemas/protocol_schema.json similarity index 100% rename from ena_objects/json_schemas/protocol_schema.json rename to ena_upload/json_parsing/json_schemas/protocol_schema.json diff --git a/ena_objects/json_schemas/publication_schema.json b/ena_upload/json_parsing/json_schemas/publication_schema.json similarity index 100% rename from ena_objects/json_schemas/publication_schema.json rename to ena_upload/json_parsing/json_schemas/publication_schema.json diff --git a/ena_objects/json_schemas/sample_schema.json b/ena_upload/json_parsing/json_schemas/sample_schema.json similarity index 100% rename from ena_objects/json_schemas/sample_schema.json rename to ena_upload/json_parsing/json_schemas/sample_schema.json diff --git a/ena_objects/json_schemas/source_schema.json b/ena_upload/json_parsing/json_schemas/source_schema.json similarity index 100% rename from ena_objects/json_schemas/source_schema.json rename to ena_upload/json_parsing/json_schemas/source_schema.json diff --git a/ena_objects/json_schemas/study_schema.json b/ena_upload/json_parsing/json_schemas/study_schema.json similarity index 100% rename from ena_objects/json_schemas/study_schema.json rename to ena_upload/json_parsing/json_schemas/study_schema.json diff --git a/ena_objects/other_material.py b/ena_upload/json_parsing/other_material.py similarity index 96% rename from ena_objects/other_material.py rename to ena_upload/json_parsing/other_material.py index a962106..9c8ba31 100644 --- a/ena_objects/other_material.py +++ b/ena_upload/json_parsing/other_material.py @@ -1,4 +1,4 @@ -from ena_objects.characteristic import ( +from ena_upload.json_parsing.characteristic import ( IsaBase, ParameterValue, OtherMaterialCharacteristic, diff --git a/ena_upload/templates/ENA_template_experiments.xml b/ena_upload/templates/ENA_template_experiments.xml index b6aaf18..9f87bab 100755 --- a/ena_upload/templates/ENA_template_experiments.xml +++ b/ena_upload/templates/ENA_template_experiments.xml @@ -1,11 +1,16 @@ + ${row.title} + + + + ${row.design_description} + ${row.spot_descriptor} + + + ${row.library_name} + + + + + + + + + ${row.library_construction_protocol} + + + + SUBMISSION_TOOL diff --git a/ena_upload/templates/ENA_template_runs.xml b/ena_upload/templates/ENA_template_runs.xml index 4c6d2f6..134d017 100755 --- a/ena_upload/templates/ENA_template_runs.xml +++ b/ena_upload/templates/ENA_template_runs.xml @@ -1,11 +1,13 @@ + + diff --git a/ena_upload/templates/ENA_template_samples_ERC000011.xml b/ena_upload/templates/ENA_template_samples_ERC000011.xml index 762acc9..0eba21f 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000011.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000011.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -70,14 +81,18 @@ def attributetest(row, column): ${row['collected_by']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (region and locality) diff --git a/ena_upload/templates/ENA_template_samples_ERC000012.xml b/ena_upload/templates/ENA_template_samples_ERC000012.xml index 2bdefef..f445cbf 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000012.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000012.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,47 +208,63 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + + altitude ${row['altitude']} m + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + elevation diff --git a/ena_upload/templates/ENA_template_samples_ERC000013.xml b/ena_upload/templates/ENA_template_samples_ERC000013.xml index aebff11..c37b980 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000013.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000013.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,10 +208,12 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + altitude @@ -206,20 +221,26 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) @@ -233,18 +254,24 @@ def attributetest(row, column): m + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + elevation diff --git a/ena_upload/templates/ENA_template_samples_ERC000014.xml b/ena_upload/templates/ENA_template_samples_ERC000014.xml index 3868fe1..518bb60 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000014.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000014.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -201,42 +214,56 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + source material identifiers diff --git a/ena_upload/templates/ENA_template_samples_ERC000015.xml b/ena_upload/templates/ENA_template_samples_ERC000015.xml index 82e9129..e7e2be0 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000015.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000015.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,42 +208,56 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + source material identifiers diff --git a/ena_upload/templates/ENA_template_samples_ERC000016.xml b/ena_upload/templates/ENA_template_samples_ERC000016.xml index 6a20547..9003c92 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000016.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000016.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,42 +208,56 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + source material identifiers diff --git a/ena_upload/templates/ENA_template_samples_ERC000017.xml b/ena_upload/templates/ENA_template_samples_ERC000017.xml index b0c021a..d731a53 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000017.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000017.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,42 +208,56 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + source material identifiers diff --git a/ena_upload/templates/ENA_template_samples_ERC000018.xml b/ena_upload/templates/ENA_template_samples_ERC000018.xml index d1c765a..1852f93 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000018.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000018.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,42 +208,56 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + source material identifiers diff --git a/ena_upload/templates/ENA_template_samples_ERC000019.xml b/ena_upload/templates/ENA_template_samples_ERC000019.xml index 921a17e..c1bc72f 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000019.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000019.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,10 +208,12 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + altitude @@ -206,48 +221,64 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + depth ${row['depth']} m + + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + + elevation ${row['elevation']} m + source material identifiers diff --git a/ena_upload/templates/ENA_template_samples_ERC000020.xml b/ena_upload/templates/ENA_template_samples_ERC000020.xml index 8e932a5..d5ac689 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000020.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000020.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,10 +208,12 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + altitude @@ -206,20 +221,26 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) @@ -233,18 +254,24 @@ def attributetest(row, column): m + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + elevation diff --git a/ena_upload/templates/ENA_template_samples_ERC000021.xml b/ena_upload/templates/ENA_template_samples_ERC000021.xml index 7ecc03d..92b3f83 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000021.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000021.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,10 +208,12 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + altitude @@ -206,48 +221,64 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + depth ${row['depth']} m + + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + + elevation ${row['elevation']} m + source material identifiers diff --git a/ena_upload/templates/ENA_template_samples_ERC000022.xml b/ena_upload/templates/ENA_template_samples_ERC000022.xml index d222394..1d2f41e 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000022.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000022.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -41,10 +52,12 @@ def attributetest(row, column): ${row['profile position']} + project name ${row['project name']} + experimental factor @@ -220,10 +233,12 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + altitude @@ -231,48 +246,64 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + depth ${row['depth']} m + + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + + elevation ${row['elevation']} m + source material identifiers diff --git a/ena_upload/templates/ENA_template_samples_ERC000023.xml b/ena_upload/templates/ENA_template_samples_ERC000023.xml index 4e65e48..ce20952 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000023.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000023.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,24 +208,32 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) @@ -226,18 +247,24 @@ def attributetest(row, column): m + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + source material identifiers diff --git a/ena_upload/templates/ENA_template_samples_ERC000024.xml b/ena_upload/templates/ENA_template_samples_ERC000024.xml index cb10191..0e4fb55 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000024.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000024.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,10 +208,12 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + altitude @@ -206,43 +221,57 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + depth ${row['depth']} m + + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + elevation diff --git a/ena_upload/templates/ENA_template_samples_ERC000025.xml b/ena_upload/templates/ENA_template_samples_ERC000025.xml index f837269..373c61e 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000025.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000025.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,10 +208,12 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + altitude @@ -206,20 +221,26 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) @@ -233,18 +254,24 @@ def attributetest(row, column): m + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + elevation diff --git a/ena_upload/templates/ENA_template_samples_ERC000027.xml b/ena_upload/templates/ENA_template_samples_ERC000027.xml index 49ab6e8..7af98df 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000027.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000027.xml @@ -1,37 +1,52 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + Latitude Start ${row['Latitude Start']} DD + + Longitude Start ${row['Longitude Start']} DD + Latitude End @@ -46,19 +61,25 @@ def attributetest(row, column): DD + Depth ${row['Depth']} m + + Protocol Label ${row['Protocol Label']} + + project name ${row['project name']} + experimental factor @@ -216,42 +237,60 @@ def attributetest(row, column): ${row['relevant standard operating procedures']} + environmental package ${row['environmental package']} + + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + + Sampling Campaign ${row['Sampling Campaign']} + + Sampling Site ${row['Sampling Site']} + + Sampling Platform ${row['Sampling Platform']} + source material identifiers @@ -335,20 +374,26 @@ def attributetest(row, column): ${row['sample storage location']} + Marine Region ${row['Marine Region']} + + Temperature ${row['Temperature']} ºC + + Salinity ${row['Salinity']} psu + specific host diff --git a/ena_upload/templates/ENA_template_samples_ERC000028.xml b/ena_upload/templates/ENA_template_samples_ERC000028.xml index 3b8e283..dad6a27 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000028.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000028.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + isolation_source ${row['isolation_source']} + lat_lon @@ -38,14 +51,18 @@ def attributetest(row, column): ${row['collected_by']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (region and locality) @@ -70,20 +87,24 @@ def attributetest(row, column): ${row['mating_type']} + host health state ${row['host health state']} + lab_host ${row['lab_host']} + host scientific name ${row['host scientific name']} + bio_material @@ -102,10 +123,12 @@ def attributetest(row, column): ${row['specimen_voucher']} + isolate ${row['isolate']} + sub_species diff --git a/ena_upload/templates/ENA_template_samples_ERC000029.xml b/ena_upload/templates/ENA_template_samples_ERC000029.xml index ae32d8a..6df4506 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000029.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000029.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -52,14 +63,18 @@ def attributetest(row, column): ${row['country of travel']} + collected_by ${row['collected_by']} + + collection date ${row['collection date']} + altitude @@ -67,20 +82,26 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) @@ -107,10 +128,12 @@ def attributetest(row, column): m3 + environmental_sample ${row['environmental_sample']} + mating_type @@ -166,10 +189,12 @@ def attributetest(row, column): ${row['host life stage']} + host health state ${row['host health state']} + host sex @@ -182,10 +207,12 @@ def attributetest(row, column): ${row['lab_host']} + host scientific name ${row['host scientific name']} + passage_history @@ -198,10 +225,12 @@ def attributetest(row, column): ${row['sample storage conditions']} + Is the sequenced pathogen host associated? ${row['Is the sequenced pathogen host associated?']} + bio_material @@ -220,10 +249,12 @@ def attributetest(row, column): ${row['specimen_voucher']} + isolate ${row['isolate']} + sub_species diff --git a/ena_upload/templates/ENA_template_samples_ERC000030.xml b/ena_upload/templates/ENA_template_samples_ERC000030.xml index ec074fd..bf37fa7 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000030.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000030.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -40,16 +51,20 @@ def attributetest(row, column): ${row['Event Date/Time End']} + Latitude Start ${row['Latitude Start']} DD + + Longitude Start ${row['Longitude Start']} DD + Latitude End @@ -64,21 +79,25 @@ def attributetest(row, column): DD + Depth ${row['Depth']} m + Sample Collection Device ${row['Sample Collection Device']} + Protocol Label ${row['Protocol Label']} + Size Fraction Lower Threshold @@ -103,55 +122,79 @@ def attributetest(row, column): ${row['Last Update Date']} + project name ${row['project name']} + + environmental package ${row['environmental package']} + + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + + Sampling Campaign ${row['Sampling Campaign']} + + Sampling Station ${row['Sampling Station']} + + Sampling Platform ${row['Sampling Platform']} + + Marine Region ${row['Marine Region']} + + Salinity Sensor ${row['Salinity Sensor']} psu + Oxygen Sensor @@ -166,11 +209,13 @@ def attributetest(row, column): µmol/L + Temperature ${row['Temperature']} ºC + Chlorophyll Sensor diff --git a/ena_upload/templates/ENA_template_samples_ERC000031.xml b/ena_upload/templates/ENA_template_samples_ERC000031.xml index 85b7d09..ba34566 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000031.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000031.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -195,42 +208,56 @@ def attributetest(row, column): ${row['positive control type']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + surface material @@ -249,56 +276,78 @@ def attributetest(row, column): ${row['indoor surface']} + indoor space ${row['indoor space']} + + filter type ${row['filter type']} + + heating and cooling system type ${row['heating and cooling system type']} + substructure type ${row['substructure type']} + light type ${row['light type']} + + building setting ${row['building setting']} + + building occupancy type ${row['building occupancy type']} + + space typical state ${row['space typical state']} + + typical occupant density ${row['typical occupant density']} + + occupancy at sampling ${row['occupancy at sampling']} + + occupant density at sampling ${row['occupant density at sampling']} + + ventilation type ${row['ventilation type']} + source material identifiers @@ -336,10 +385,12 @@ def attributetest(row, column): ${row['sample size sorting method']} + organism count ${row['organism count']} + sample collection device @@ -364,16 +415,20 @@ def attributetest(row, column): ${row['host scientific name']} + relative air humidity ${row['relative air humidity']} % + + absolute air humidity ${row['absolute air humidity']} kg + surface humidity @@ -381,11 +436,13 @@ def attributetest(row, column): % + air temperature ${row['air temperature']} ºC + surface temperature @@ -413,11 +470,13 @@ def attributetest(row, column): ºC + carbon dioxide ${row['carbon dioxide']} µmol/L + subspecific genetic lineage diff --git a/ena_upload/templates/ENA_template_samples_ERC000032.xml b/ena_upload/templates/ENA_template_samples_ERC000032.xml index 2fdce8d..25ca461 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000032.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000032.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -160,14 +171,18 @@ def attributetest(row, column): ${row['illness symptoms']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (latitude) @@ -200,14 +215,18 @@ def attributetest(row, column): ${row['host disease outcome']} + host common name ${row['host common name']} + + host subject id ${row['host subject id']} + host age @@ -215,34 +234,48 @@ def attributetest(row, column): years + host health state ${row['host health state']} + + host sex ${row['host sex']} + + host scientific name ${row['host scientific name']} + + influenza test method ${row['influenza test method']} + + influenza test result ${row['influenza test result']} + + other pathogens tested ${row['other pathogens tested']} + + other pathogens test result ${row['other pathogens test result']} + influenza virus type @@ -273,14 +306,18 @@ def attributetest(row, column): ${row['lineage:swl (required for H1N1 viruses)']} + collector name ${row['collector name']} + + collecting institution ${row['collecting institution']} + receipt date diff --git a/ena_upload/templates/ENA_template_samples_ERC000033.xml b/ena_upload/templates/ENA_template_samples_ERC000033.xml index f8acbe2..dc93b8d 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000033.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000033.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -64,14 +75,18 @@ def attributetest(row, column): ${row['illness symptoms']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (latitude) @@ -104,14 +119,18 @@ def attributetest(row, column): ${row['host disease outcome']} + host common name ${row['host common name']} + + host subject id ${row['host subject id']} + host age @@ -119,38 +138,48 @@ def attributetest(row, column): years + host health state ${row['host health state']} + + host sex ${row['host sex']} + lab_host ${row['lab_host']} + host scientific name ${row['host scientific name']} + virus identifier ${row['virus identifier']} + collector name ${row['collector name']} + + collecting institution ${row['collecting institution']} + receipt date @@ -175,10 +204,12 @@ def attributetest(row, column): ${row['serotype (required for a seropositive sample)']} + isolate ${row['isolate']} + strain diff --git a/ena_upload/templates/ENA_template_samples_ERC000034.xml b/ena_upload/templates/ENA_template_samples_ERC000034.xml index 4331d37..2ed7797 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000034.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000034.xml @@ -1,43 +1,62 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + tissue_type ${row['tissue_type']} + + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + sex ${row['sex']} + date of birth @@ -50,14 +69,18 @@ def attributetest(row, column): ${row['date of death']} + diagnosis ${row['diagnosis']} + + strain ${row['strain']} + tumor grading (OBI_0600002) diff --git a/ena_upload/templates/ENA_template_samples_ERC000035.xml b/ena_upload/templates/ENA_template_samples_ERC000035.xml index b07af40..167931f 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000035.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000035.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -58,14 +69,18 @@ def attributetest(row, column): ${row['protocol']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + sampling time point diff --git a/ena_upload/templates/ENA_template_samples_ERC000036.xml b/ena_upload/templates/ENA_template_samples_ERC000036.xml index ffa958a..42495ab 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000036.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000036.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -40,24 +51,30 @@ def attributetest(row, column): ${row['nucleic acid amplification']} + investigation type ${row['investigation type']} + surveillance target ${row['surveillance target']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (latitude) @@ -136,10 +153,12 @@ def attributetest(row, column): ${row['receipt date']} + sewage type ${row['sewage type']} + temperature diff --git a/ena_upload/templates/ENA_template_samples_ERC000037.xml b/ena_upload/templates/ENA_template_samples_ERC000037.xml index 7bf0247..a4324bc 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000037.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000037.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -59,10 +70,12 @@ def attributetest(row, column): ${row['collected_by']} + collection date ${row['collection date']} + altitude @@ -70,20 +83,26 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) @@ -140,10 +159,12 @@ def attributetest(row, column): ${row['sample material processing']} + isolation and growth condition ${row['isolation and growth condition']} + propagation @@ -183,14 +204,18 @@ def attributetest(row, column): ${row['sampling time point']} + plant structure ${row['plant structure']} + + plant developmental stage ${row['plant developmental stage']} + sampled age @@ -408,10 +433,12 @@ def attributetest(row, column): ${row['soil pH']} + plant growth medium ${row['plant growth medium']} + rooting conditions diff --git a/ena_upload/templates/ENA_template_samples_ERC000038.xml b/ena_upload/templates/ENA_template_samples_ERC000038.xml index 2842286..e817e3b 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000038.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000038.xml @@ -1,84 +1,119 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + Latitude Start ${row['Latitude Start']} DD + + Longitude Start ${row['Longitude Start']} DD + + Depth ${row['Depth']} m + Sample Collection Device ${row['Sample Collection Device']} + Protocol Label ${row['Protocol Label']} + + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + + Sampling Campaign ${row['Sampling Campaign']} + + Sampling Station ${row['Sampling Station']} + + Sampling Platform ${row['Sampling Platform']} + storage conditions (fresh/frozen/other) @@ -103,38 +138,52 @@ def attributetest(row, column): ${row['Marine Region']} + seabed habitat ${row['seabed habitat']} + + age ${row['age']} + + aquaculture origin ${row['aquaculture origin']} + + shellfish total weight ${row['shellfish total weight']} g + + shellfish soft tissue weight ${row['shellfish soft tissue weight']} g + + shell length ${row['shell length']} g + + shell width ${row['shell width']} g + adductor weight diff --git a/ena_upload/templates/ENA_template_samples_ERC000039.xml b/ena_upload/templates/ENA_template_samples_ERC000039.xml index 4484ca4..2b7d23f 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000039.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000039.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + dev_stage ${row['dev_stage']} + subject exposure @@ -56,14 +69,18 @@ def attributetest(row, column): ${row['country of travel']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (latitude) @@ -133,24 +150,30 @@ def attributetest(row, column): ${row['host scientific name']} + collector name ${row['collector name']} + + collecting institution ${row['collecting institution']} + sample storage conditions ${row['sample storage conditions']} + isolate ${row['isolate']} + strain diff --git a/ena_upload/templates/ENA_template_samples_ERC000040.xml b/ena_upload/templates/ENA_template_samples_ERC000040.xml index 99f30c3..6572dcf 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000040.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000040.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -34,18 +45,24 @@ def attributetest(row, column): ${row['Size Fraction Upper Threshold']} + target gene ${row['target gene']} + + target subfragment ${row['target subfragment']} + + pcr primers ${row['pcr primers']} + isolation_source @@ -58,10 +75,12 @@ def attributetest(row, column): ${row['collected_by']} + collection date ${row['collection date']} + altitude @@ -69,20 +88,26 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) @@ -96,28 +121,36 @@ def attributetest(row, column): m + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + sample collection device or method ${row['sample collection device or method']} + environmental_sample ${row['environmental_sample']} + Salinity diff --git a/ena_upload/templates/ENA_template_samples_ERC000041.xml b/ena_upload/templates/ENA_template_samples_ERC000041.xml index 4cd3f7e..e9f333f 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000041.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000041.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -65,14 +76,18 @@ def attributetest(row, column): ${row['isolation_source']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (region and locality) @@ -143,10 +158,12 @@ def attributetest(row, column): ${row['links to additional analysis']} + isolate ${row['isolate']} + sub_species diff --git a/ena_upload/templates/ENA_template_samples_ERC000043.xml b/ena_upload/templates/ENA_template_samples_ERC000043.xml index f6348a0..22d0660 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000043.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000043.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -35,14 +46,18 @@ def attributetest(row, column): ${row['collected_by']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (latitude) @@ -135,10 +150,12 @@ def attributetest(row, column): ${row['culture_collection']} + strain ${row['strain']} + Further Details diff --git a/ena_upload/templates/ENA_template_samples_ERC000044.xml b/ena_upload/templates/ENA_template_samples_ERC000044.xml index 885f325..ce3857b 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000044.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000044.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -52,18 +63,24 @@ def attributetest(row, column): ${row['country of travel']} + collected_by ${row['collected_by']} + + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (region and locality) @@ -82,14 +99,18 @@ def attributetest(row, column): ${row['host disease outcome']} + host scientific name ${row['host scientific name']} + + isolate ${row['isolate']} + sub_type diff --git a/ena_upload/templates/ENA_template_samples_ERC000045.xml b/ena_upload/templates/ENA_template_samples_ERC000045.xml index 6a35949..0e8a8df 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000045.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000045.xml @@ -1,45 +1,62 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + isolation_source ${row['isolation_source']} + collected_by ${row['collected_by']} + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (region and locality) @@ -52,10 +69,12 @@ def attributetest(row, column): ${row['receipt date']} + isolate ${row['isolate']} + serotype diff --git a/ena_upload/templates/ENA_template_samples_ERC000047.xml b/ena_upload/templates/ENA_template_samples_ERC000047.xml index 71018dc..d252cc9 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000047.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000047.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -158,26 +171,32 @@ def attributetest(row, column): ${row['tRNA extraction software']} + completeness score ${row['completeness score']} % + + completeness software ${row['completeness software']} + completeness approach ${row['completeness approach']} + contamination score ${row['contamination score']} % + contamination screening input @@ -196,10 +215,12 @@ def attributetest(row, column): ${row['decontamination software']} + binning software ${row['binning software']} + reassembly post binning @@ -213,32 +234,42 @@ def attributetest(row, column): ${row['MAG coverage software']} + assembly quality ${row['assembly quality']} + + binning parameters ${row['binning parameters']} + + taxonomic identity marker ${row['taxonomic identity marker']} + taxonomic classification ${row['taxonomic classification']} + isolation_source ${row['isolation_source']} + + collection date ${row['collection date']} + altitude @@ -246,20 +277,26 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) @@ -273,18 +310,24 @@ def attributetest(row, column): m + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + elevation @@ -317,14 +360,18 @@ def attributetest(row, column): ${row['size fraction selected']} + sample derived from ${row['sample derived from']} + + metagenomic source ${row['metagenomic source']} + sample collection device diff --git a/ena_upload/templates/ENA_template_samples_ERC000048.xml b/ena_upload/templates/ENA_template_samples_ERC000048.xml index efa3968..3871629 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000048.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000048.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -220,38 +233,48 @@ def attributetest(row, column): ${row['taxonomic classification']} + sorting technology ${row['sorting technology']} + + single cell or viral particle lysis approach ${row['single cell or viral particle lysis approach']} + single cell or viral particle lysis kit protocol ${row['single cell or viral particle lysis kit protocol']} + WGA amplification approach ${row['WGA amplification approach']} + WGA amplification kit ${row['WGA amplification kit']} + isolation_source ${row['isolation_source']} + + collection date ${row['collection date']} + altitude @@ -259,20 +282,26 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) @@ -286,18 +315,24 @@ def attributetest(row, column): m + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + elevation @@ -330,14 +365,18 @@ def attributetest(row, column): ${row['size fraction selected']} + sample derived from ${row['sample derived from']} + + metagenomic source ${row['metagenomic source']} + sample collection device diff --git a/ena_upload/templates/ENA_template_samples_ERC000049.xml b/ena_upload/templates/ENA_template_samples_ERC000049.xml index 15b53e1..66ae624 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000049.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000049.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -122,10 +135,12 @@ def attributetest(row, column): ${row['number of standard tRNAs extracted']} + assembly software ${row['assembly software']} + feature prediction @@ -236,30 +251,42 @@ def attributetest(row, column): ${row['WGA amplification kit']} + source of UViGs ${row['source of UViGs']} + + virus enrichment approach ${row['virus enrichment approach']} + + predicted genome type ${row['predicted genome type']} + + predicted genome structure ${row['predicted genome structure']} + + detection type ${row['detection type']} + + viral identification software ${row['viral identification software']} + OTU classification approach @@ -290,18 +317,24 @@ def attributetest(row, column): ${row['host prediction estimated accuracy']} + UViG assembly quality ${row['UViG assembly quality']} + + isolation_source ${row['isolation_source']} + + collection date ${row['collection date']} + altitude @@ -309,20 +342,26 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + geographic location (region and locality) @@ -336,18 +375,24 @@ def attributetest(row, column): m + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + elevation @@ -380,14 +425,18 @@ def attributetest(row, column): ${row['size fraction selected']} + sample derived from ${row['sample derived from']} + + metagenomic source ${row['metagenomic source']} + sample collection device diff --git a/ena_upload/templates/ENA_template_samples_ERC000050.xml b/ena_upload/templates/ENA_template_samples_ERC000050.xml index 866ee78..0038aca 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000050.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000050.xml @@ -1,31 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + experimental factor @@ -86,10 +99,12 @@ def attributetest(row, column): ${row['adapters']} + sequencing method ${row['sequencing method']} + relevant electronic resources @@ -108,10 +123,12 @@ def attributetest(row, column): ${row['number of standard tRNAs extracted']} + assembly software ${row['assembly software']} + 16S recovered @@ -174,10 +191,12 @@ def attributetest(row, column): ${row['decontamination software']} + binning software ${row['binning software']} + reassembly post binning @@ -197,14 +216,18 @@ def attributetest(row, column): ${row['assembly quality']} + investigation type ${row['investigation type']} + + binning parameters ${row['binning parameters']} + taxonomic identity marker @@ -217,14 +240,18 @@ def attributetest(row, column): ${row['taxonomic classification']} + isolation_source ${row['isolation_source']} + + collection date ${row['collection date']} + altitude @@ -232,20 +259,26 @@ def attributetest(row, column): m + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + geographic location (latitude) ${row['geographic location (latitude)']} DD + + geographic location (longitude) ${row['geographic location (longitude)']} DD + depth @@ -253,18 +286,24 @@ def attributetest(row, column): m + broad-scale environmental context ${row['broad-scale environmental context']} + + local environmental context ${row['local environmental context']} + + environmental medium ${row['environmental medium']} + elevation @@ -303,14 +342,18 @@ def attributetest(row, column): ${row['size fraction selected']} + sample derived from ${row['sample derived from']} + + metagenomic source ${row['metagenomic source']} + relationship to oxygen diff --git a/ena_upload/templates/ENA_template_samples_ERC000051.xml b/ena_upload/templates/ENA_template_samples_ERC000051.xml index 149e4f3..68608d9 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000051.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000051.xml @@ -1,97 +1,134 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + + sample origin ${row['sample origin']} + + sample taxon name ${row['sample taxon name']} + + sample material ${row['sample material']} + + engrafted tumor sample passage ${row['engrafted tumor sample passage']} + engrafted tumor collection site ${row['engrafted tumor collection site']} + patient tumor site of collection ${row['patient tumor site of collection']} + + patient tumor type ${row['patient tumor type']} + + sample unique ID ${row['sample unique ID']} + engraftment host strain name ${row['engraftment host strain name']} + patient age at collection of tumor ${row['patient age at collection of tumor']} + + patient tumor diagnosis at time of collection ${row['patient tumor diagnosis at time of collection']} + + patient tumor primary site ${row['patient tumor primary site']} + was the PDX model humanised? ${row['was the PDX model humanised?']} + patient sex ${row['patient sex']} + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000052.xml b/ena_upload/templates/ENA_template_samples_ERC000052.xml index c28d696..f5eb87f 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000052.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000052.xml @@ -1,36 +1,51 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + + project name ${row['project name']} + + sample volume or weight for DNA extraction ${row['sample volume or weight for DNA extraction']} ng + nucleic acid extraction @@ -49,22 +64,30 @@ def attributetest(row, column): ${row['adapters']} + sequencing method ${row['sequencing method']} + + reference host genome for decontamination ${row['reference host genome for decontamination']} + + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (latitude) @@ -92,11 +115,13 @@ def attributetest(row, column): years + trial timepoint ${row['trial timepoint']} years + sample storage temperature @@ -128,22 +153,30 @@ def attributetest(row, column): ${row['host disease status']} + host common name ${row['host common name']} + + host subject id ${row['host subject id']} + + host taxid ${row['host taxid']} + + host body site ${row['host body site']} + host length @@ -189,10 +222,12 @@ def attributetest(row, column): ${row['host diet']} + host diet treatment ${row['host diet treatment']} + host diet treatment concentration diff --git a/ena_upload/templates/ENA_template_samples_ERC000053.xml b/ena_upload/templates/ENA_template_samples_ERC000053.xml index 0a21e87..f2c56bf 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000053.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000053.xml @@ -1,26 +1,37 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + @@ -50,18 +61,24 @@ def attributetest(row, column): DD + organism part ${row['organism part']} + + lifestage ${row['lifestage']} + + project name ${row['project name']} + tolid @@ -74,18 +91,24 @@ def attributetest(row, column): ${row['barcoding center']} + collected_by ${row['collected_by']} + + collection date ${row['collection date']} + + geographic location (country and/or sea) ${row['geographic location (country and/or sea)']} + geographic location (latitude) @@ -100,10 +123,12 @@ def attributetest(row, column): DD + geographic location (region and locality) ${row['geographic location (region and locality)']} + identified_by @@ -124,10 +149,12 @@ def attributetest(row, column): m + habitat ${row['habitat']} + identifier_affiliation @@ -196,10 +223,12 @@ def attributetest(row, column): ${row['sample coordinator affiliation']} + sex ${row['sex']} + relationship @@ -212,10 +241,12 @@ def attributetest(row, column): ${row['symbiont']} + collecting institution ${row['collecting institution']} + GAL diff --git a/ena_upload/templates/ENA_template_studies.xml b/ena_upload/templates/ENA_template_studies.xml index 5c46625..374e743 100755 --- a/ena_upload/templates/ENA_template_studies.xml +++ b/ena_upload/templates/ENA_template_studies.xml @@ -1,22 +1,32 @@ - + + ${row.title} + + + + ${row.study_abstract} + ${row.center_project_name} diff --git a/ena_upload/templates/jinja_templates/ENA_template_samples.xml b/ena_upload/templates/jinja_templates/ENA_template_samples.xml index ee30272..8b8e978 100755 --- a/ena_upload/templates/jinja_templates/ENA_template_samples.xml +++ b/ena_upload/templates/jinja_templates/ENA_template_samples.xml @@ -1,38 +1,44 @@ + ${row.title} + + ${row.taxon_id} + ${row.scientific_name} ${row.common_name} + ${row.sample_description} + {%- for key, value in attributes.items() %} - {%- if value['cardinality'].lower() == 'mandatory' %} - - {{key}} - ${row['{{key}}']} - {%- if value['units'] %} - {{value['units']}} - {%- endif %} - - {%- else %} + {%- if value['cardinality'].lower() != 'mandatory' %} + {%- else %} + + {%- endif %} {{key}} ${row['{{key}}']} @@ -41,7 +47,6 @@ def attributetest(row, column): {%- endif %} - {%- endif %} {%- endfor %} SUBMISSION_TOOL diff --git a/example_read_isa_json.ipynb b/example_read_isa_json.ipynb index daaf65c..f804ff3 100644 --- a/example_read_isa_json.ipynb +++ b/example_read_isa_json.ipynb @@ -22,7 +22,7 @@ "source": [ "import json\n", "import os\n", - "from ena_objects.ena_submission import EnaSubmission\n" + "from ena_upload.json_parsing.ena_submission import EnaSubmission\n" ] }, { diff --git a/example_tables/ENA_template_samples.tsv b/example_tables/ENA_template_samples.tsv index 9b0d035..c3084d8 100644 --- a/example_tables/ENA_template_samples.tsv +++ b/example_tables/ENA_template_samples.tsv @@ -1,3 +1,3 @@ -alias title scientific_name sample_description -sample_alias_4 sample_title_1 homo sapiens sample_description_1 -sample_alias_5 sample_title_2 human metagenome sample_description_2 +alias title scientific_name sample_description collection date geographic location (country and/or sea) +sample_alias_4 sample_title_1 homo sapiens sample_description_1 2020-10-11 Argentina +sample_alias_5 sample_title_2 human metagenome sample_description_2 2008-01-24 Belgium diff --git a/receipt.xml b/receipt.xml new file mode 100644 index 0000000..c298787 --- /dev/null +++ b/receipt.xml @@ -0,0 +1,12 @@ + + + + + + + In study, alias: "https://datahub.elixir-belgium.org/studies/27_28". The object being added already exists in the submission account with accession: "ERP152667". + This submission is a TEST submission and will be discarded within 24 hours + + ADD + HOLD + \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 39a9173..d04d837 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,8 @@ genshi==0.7.* lxml>=4.9.3, <= 5.0.0 pandas>=2.0.3 , <= 3.0.0 -pyyaml==5.* +pyyaml==5.* requests>=2.31.0 , <= 3.0.0 openpyxl>=3.1.2 , <= 4.0.0 jsonschema>=4.19.1 +pytest diff --git a/setup.py b/setup.py index 7fd09ec..9b5e2b2 100644 --- a/setup.py +++ b/setup.py @@ -30,5 +30,5 @@ python_requires='>=3.5', entry_points={ 'console_scripts': ["ena-upload-cli=ena_upload.ena_upload:main"] - }, + }, ) diff --git a/tests/test_ena_objects.py b/tests/test_ena_objects.py index 6f5a917..ed7e23b 100644 --- a/tests/test_ena_objects.py +++ b/tests/test_ena_objects.py @@ -1,13 +1,11 @@ -import os import jsonschema import pytest import json -from rich import print_json -from ena_objects.characteristic import IsaBase -from ena_objects.ena_submission import EnaSubmission, EnaSample -from ena_objects.ena_std_lib import fetch_assay_streams, study_publication_ids -from ena_objects.ena_study import EnaStudy +from ena_upload.json_parsing.characteristic import IsaBase +from ena_upload.json_parsing.ena_submission import EnaSubmission, EnaSample +from ena_upload.json_parsing.ena_std_lib import fetch_assay_streams, study_publication_ids +from ena_upload.json_parsing.ena_study import EnaStudy test_isa_jsonfile = open( "tests/test_data/multi_study_multi_assay_stream_investigation.json" diff --git a/var/xml_converter.py b/var/xml_converter.py index 500ead2..7016b4b 100644 --- a/var/xml_converter.py +++ b/var/xml_converter.py @@ -74,8 +74,8 @@ def main(): output_from_parsed_template = t.render(attributes=xml_tree) # Saving new xml template file - with open(f"ena_upload/templates/ENA_template_samples_{checklist}.xml", "w") as fh: - fh.write(output_from_parsed_template) + with open(f"ena_upload/templates/ENA_template_samples_{checklist}.xml", "wb") as fh: + fh.write(output_from_parsed_template.encode('utf-8')) if __name__ == "__main__": From 5e29034964d7ed0bb197f544131ab2e34578029d Mon Sep 17 00:00:00 2001 From: bedroesb Date: Fri, 20 Oct 2023 13:44:42 +0200 Subject: [PATCH 42/62] specify pytest version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d04d837..ebb373f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,4 @@ pyyaml==5.* requests>=2.31.0 , <= 3.0.0 openpyxl>=3.1.2 , <= 4.0.0 jsonschema>=4.19.1 -pytest +pytest==7.4.* From fcd6fa7863962471229bfd8b0c01f2457eee3262 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 20 Oct 2023 14:33:29 +0200 Subject: [PATCH 43/62] Upload simple test case ISA JSON --- tests/test_data/simple_test_case.json | 1618 +++++++++++++++++++++++++ 1 file changed, 1618 insertions(+) create mode 100644 tests/test_data/simple_test_case.json diff --git a/tests/test_data/simple_test_case.json b/tests/test_data/simple_test_case.json new file mode 100644 index 0000000..acd75b7 --- /dev/null +++ b/tests/test_data/simple_test_case.json @@ -0,0 +1,1618 @@ +{ + "identifier": "", + "title": "Test Case ENA upload tool", + "description": "This investigation matches the test case of the ENA upload tool", + "submissionDate": "", + "publicReleaseDate": "", + "ontologySourceReferences": [], + "filename": "Test Case ENA upload tool.txt", + "comments": [ + { + "name": "ISAjson export time", + "value": "2023-10-20T12:28:47Z" + }, + { + "name": "SEEK Project name", + "value": "Test Project ENA upload Tool" + }, + { + "name": "SEEK Project ID", + "value": "http://localhost:3000/single_pages/16" + }, + { + "name": "SEEK Investigation ID", + "value": "27" + } + ], + "publications": [], + "people": [ + { + "@id": "#people/4", + "lastName": "De Pelseneer", + "firstName": "Kevin", + "midInitials": "", + "email": "kevin.depelseneer@psb.ugent.be", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { + "termAccession": "", + "termSource": "", + "annotationValue": "" + } + ], + "comments": [ + { + "@id": "", + "value": "", + "name": "" + } + ] + } + ], + "studies": [ + { + "identifier": "", + "title": "Study - Test Case ENA", + "description": "", + "submissionDate": "", + "publicReleaseDate": "", + "filename": "Study - Test Case ENA.txt", + "comments": [ + { + "@id": "#study_comment/25_13_49", + "name": "ena_sample_alias_prefix", + "value": "sample_alias_" + }, + { + "@id": "#study_comment/25_23caf0e0-5172-013c-675f-7a163e608de1", + "name": "SEEK Study ID", + "value": "25" + }, + { + "@id": "#study_comment/25_23caf880-5172-013c-6760-7a163e608de1", + "name": "SEEK creation date", + "value": "2023-10-20T11:12:23Z" + } + ], + "publications": [], + "people": [ + { + "@id": "#people/4", + "lastName": "De Pelseneer", + "firstName": "Kevin", + "midInitials": "", + "email": "kevin.depelseneer@psb.ugent.be", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { + "termAccession": "", + "termSource": "", + "annotationValue": "" + } + ], + "comments": [ + { + "@id": "", + "value": "", + "name": "" + } + ] + } + ], + "studyDesignDescriptors": [], + "characteristicCategories": [ + { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_657", + "characteristicType": { + "annotationValue": "geographic location (country and/or sea)", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/taxon_id_658", + "characteristicType": { + "annotationValue": "taxon_id", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/title_662", + "characteristicType": { + "annotationValue": "title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/sample_description_663", + "characteristicType": { + "annotationValue": "sample_description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/collection_date_664", + "characteristicType": { + "annotationValue": "collection date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/accession_665", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_666", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_667", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "sources": [ + { + "@id": "#source/223", + "name": "Source 1", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_657" + }, + "value": { + "annotationValue": "Argentina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/taxon_id_658" + }, + "value": { + "annotationValue": "9606", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#source/226", + "name": "Source 2", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_657" + }, + "value": { + "annotationValue": "Belgium", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/taxon_id_658" + }, + "value": { + "annotationValue": "646099", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ], + "samples": [ + { + "@id": "#sample/224", + "name": "Sample 1", + "derivesFrom": [ + { + "@id": "#source/223" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_662" + }, + "value": { + "annotationValue": "sample_title_1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_663" + }, + "value": { + "annotationValue": "sample_description_1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_664" + }, + "value": { + "annotationValue": "2020-10-11", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_665" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_666" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_667" + }, + "value": { + "annotationValue": "add", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/225", + "name": "Sample 2", + "derivesFrom": [ + { + "@id": "#source/226" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_662" + }, + "value": { + "annotationValue": "sample_title_2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_663" + }, + "value": { + "annotationValue": "sample_description_2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_664" + }, + "value": { + "annotationValue": "2008-01-24", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_665" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_666" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_667" + }, + "value": { + "annotationValue": "add", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ] + }, + "protocols": [ + { + "@id": "#protocol/_25", + "name": "sample collection", + "protocolType": { + "annotationValue": "sample collection", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_48", + "name": "library construction", + "protocolType": { + "annotationValue": "library construction", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [ + { + "@id": "#parameter/672", + "parameterName": { + "annotationValue": "design_description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/674", + "parameterName": { + "annotationValue": "library_source", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/675", + "parameterName": { + "annotationValue": "library_strategy", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/676", + "parameterName": { + "annotationValue": "library_selection", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/677", + "parameterName": { + "annotationValue": "library_layout", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/678", + "parameterName": { + "annotationValue": "insert_size", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/679", + "parameterName": { + "annotationValue": "platform", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/680", + "parameterName": { + "annotationValue": "instrument_model", + "termAccession": "", + "termSource": "" + } + } + ], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_49", + "name": "nucleic acid sequencing", + "protocolType": { + "annotationValue": "nucleic acid sequencing", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + } + ], + "processSequence": [ + { + "@id": "#process/sample_collection/224", + "name": "", + "executesProtocol": { + "@id": "#protocol/_25" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/223" + } + ], + "outputs": [ + { + "@id": "#sample/224" + } + ] + }, + { + "@id": "#process/sample_collection/225", + "name": "", + "executesProtocol": { + "@id": "#protocol/_25" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/226" + } + ], + "outputs": [ + { + "@id": "#sample/225" + } + ] + } + ], + "assays": [ + { + "@id": "#assay/48_49", + "filename": "a_assays_48_49.txt", + "measurementType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "technologyType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "comments": [ + { + "@id": "#assay_comment/48_49", + "name": "linked_assays", + "value": "[{\"id\":48,\"title\":\"Assay 1 - Test case ENA upload tool\"},{\"id\":49,\"title\":\"Assay 2 - Test case ENA upload tool\"}]" + } + ], + "technologyPlatform": "", + "characteristicCategories": [ + { + "@id": "#characteristic_category/title_671", + "characteristicType": { + "annotationValue": "title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/library_name_673", + "characteristicType": { + "annotationValue": "library_name", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/accession_681", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_682", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_683", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "samples": [], + "otherMaterials": [ + { + "@id": "#other_material/227", + "name": "library_construction_protocol_1", + "type": "library_construction_protocol", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_671" + }, + "value": { + "annotationValue": "experiment_title_1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/library_name_673" + }, + "value": { + "annotationValue": "library_name_1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_681" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_682" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_683" + }, + "value": { + "annotationValue": "add", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/224" + } + ] + }, + { + "@id": "#other_material/228", + "name": "library_construction_protocol_2", + "type": "library_construction_protocol", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_671" + }, + "value": { + "annotationValue": "experiment_title_2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/library_name_673" + }, + "value": { + "annotationValue": "library_name_2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_681" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_682" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_683" + }, + "value": { + "annotationValue": "add", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/225" + } + ] + }, + { + "@id": "#other_material/229", + "name": "library_construction_protocol_2", + "type": "library_construction_protocol", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_671" + }, + "value": { + "annotationValue": "experiment_title_3", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/library_name_673" + }, + "value": { + "annotationValue": "library_name_2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_681" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_682" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_683" + }, + "value": { + "annotationValue": "add", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/225" + } + ] + } + ] + }, + "processSequence": [ + { + "@id": "#process/library_construction/227", + "name": "", + "executesProtocol": { + "@id": "#protocol/_48" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/672" + }, + "value": { + "annotationValue": "design_description_1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/674" + }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/675" + }, + "value": { + "annotationValue": "WGA", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/676" + }, + "value": { + "annotationValue": "RANDOM", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/677" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/678" + }, + "value": { + "annotationValue": "250", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/679" + }, + "value": { + "annotationValue": "LS454", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/680" + }, + "value": { + "annotationValue": "454 GS 20", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/227" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/227" + }, + "inputs": [ + { + "@id": "#sample/224" + } + ], + "outputs": [ + { + "@id": "#other_material/227" + } + ] + }, + { + "@id": "#process/library_construction/228", + "name": "", + "executesProtocol": { + "@id": "#protocol/_48" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/672" + }, + "value": { + "annotationValue": "design_description_2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/674" + }, + "value": { + "annotationValue": "TRANSCRIPTOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/675" + }, + "value": { + "annotationValue": "RNA-Seq", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/676" + }, + "value": { + "annotationValue": "repeat fractionation", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/677" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/678" + }, + "value": { + "annotationValue": "None", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/679" + }, + "value": { + "annotationValue": "Illumina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/680" + }, + "value": { + "annotationValue": "Illumina Genome Analyzer", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/228" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/228" + }, + "inputs": [ + { + "@id": "#sample/225" + } + ], + "outputs": [ + { + "@id": "#other_material/228" + } + ] + }, + { + "@id": "#process/library_construction/229", + "name": "", + "executesProtocol": { + "@id": "#protocol/_48" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/672" + }, + "value": { + "annotationValue": "design_description_3", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/674" + }, + "value": { + "annotationValue": "TRANSCRIPTOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/675" + }, + "value": { + "annotationValue": "RNA-Seq", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/676" + }, + "value": { + "annotationValue": "repeat fractionation", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/677" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/678" + }, + "value": { + "annotationValue": "None", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/679" + }, + "value": { + "annotationValue": "Illumina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/680" + }, + "value": { + "annotationValue": "Illumina Genome Analyzer", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/229" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/229" + }, + "inputs": [ + { + "@id": "#sample/225" + } + ], + "outputs": [ + { + "@id": "#other_material/229" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/233", + "name": "", + "executesProtocol": { + "@id": "#protocol/_49" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/233" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/227" + } + ], + "outputs": [ + { + "@id": "#data_file/233" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/234", + "name": "", + "executesProtocol": { + "@id": "#protocol/_49" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/234" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/228" + } + ], + "outputs": [ + { + "@id": "#data_file/234" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/235", + "name": "", + "executesProtocol": { + "@id": "#protocol/_49" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/235" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/229" + } + ], + "outputs": [ + { + "@id": "#data_file/235" + } + ] + } + ], + "dataFiles": [ + { + "@id": "#data_file/233", + "name": "RD file 1", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "ENA_TEST2.R1.fastq.gz" + }, + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "add" + } + ] + }, + { + "@id": "#data_file/234", + "name": "RD file 2", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "ENA_TEST2.R2.fastq.gz" + }, + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "add" + } + ] + }, + { + "@id": "#data_file/235", + "name": "RD file 3", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "ENA_TEST1.R1.fastq.gz" + }, + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "add" + } + ] + } + ], + "unitCategories": [] + } + ], + "factors": [], + "unitCategories": [] + } + ] +} \ No newline at end of file From 13ac7d25f74cee692fd1888b6e7e363323936e44 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 20 Oct 2023 14:43:34 +0200 Subject: [PATCH 44/62] Fix simple test case --- example_read_isa_json.ipynb | 1466 ++++++++++--------------- tests/test_data/simple_test_case.json | 48 +- 2 files changed, 595 insertions(+), 919 deletions(-) diff --git a/example_read_isa_json.ipynb b/example_read_isa_json.ipynb index f804ff3..871ac2f 100644 --- a/example_read_isa_json.ipynb +++ b/example_read_isa_json.ipynb @@ -1,918 +1,554 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Example Script for parsing an ISA JSON" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Import statements" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import os\n", - "from ena_upload.json_parsing.ena_submission import EnaSubmission\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Reading a JSON file" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "# Read json file\n", - "isa_json_file = open(\n", - " \"tests/test_data/multi_study_multi_assay_stream_investigation.json\"\n", - ")\n", - "isa_json = json.load(isa_json_file)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Setting some extra parameters" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "\n", - "# Change this to 'True' if you want to export the resulting DataFrames to an xlsx.\n", - "export_to_excel = False\n", - "outputfolder = \"./output_folder/\"\n", - "\n", - "required_assays = [\n", - " {\"assay_stream\": \"Ena stream 1\"},\n", - " {\"ena_study_title\": \"Ena Study 2\"},\n", - " {\"ena_study_title\": \"Ena Study 3\"},\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Parsing" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "\n", - "submission = EnaSubmission.from_isa_json(isa_json, required_assays)\n", - "submission_dfs = submission.generate_dataframes()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Output" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dataframe study:\n" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example Script for parsing an ISA JSON" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import statements" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import os\n", + "from ena_upload.json_parsing.ena_submission import EnaSubmission\n", + "from dotenv import dotenv_values\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Reading a JSON file" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Read json file\n", + "isa_json_file = open(\n", + " \"tests/test_data/simple_test_case.json\"\n", + ")\n", + "isa_json = json.load(isa_json_file)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setting some extra parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "# Change this to 'True' if you want to export the resulting DataFrames to an xlsx.\n", + "export_to_excel = False\n", + "outputfolder = \"./output_folder/\"\n", + "\n", + "required_assays = [\n", + " {\"assay_stream\": \"Ena stream 1\"},\n", + " {\"ena_study_title\": \"Ena Study 2\"},\n", + " {\"ena_study_title\": \"Ena Study 3\"},\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Parsing" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "submission = EnaSubmission.from_isa_json(isa_json, required_assays)\n", + "submission_dfs = submission.generate_dataframes()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Output" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataframe study:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
aliastitlestudy_typestudy_abstractnew_study_typepubmed_id
0study_alias_48_49study_title_1Transcriptome Analysisstudy_abstract_1None
\n", + "
" + ], + "text/plain": [ + " alias title study_type study_abstract \\\n", + "0 study_alias_48_49 study_title_1 Transcriptome Analysis study_abstract_1 \n", + "\n", + " new_study_type pubmed_id \n", + "0 None " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataframe sample:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
aliastitlesample_descriptioncollection dateaccessionsubmission datestatusgeographic location (country and/or sea)taxon_id
0sample_alias_224sample_title_1sample_description_12020-10-11addArgentina9606
1sample_alias_225sample_title_2sample_description_22008-01-24addBelgium646099
\n", + "
" + ], + "text/plain": [ + " alias title sample_description collection date \\\n", + "0 sample_alias_224 sample_title_1 sample_description_1 2020-10-11 \n", + "1 sample_alias_225 sample_title_2 sample_description_2 2008-01-24 \n", + "\n", + " accession submission date status geographic location (country and/or sea) \\\n", + "0 add Argentina \n", + "1 add Belgium \n", + "\n", + " taxon_id \n", + "0 9606 \n", + "1 646099 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataframe experiment:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
aliasstudy_aliassample_aliaslibrary_nametitleaccessionsubmission datestatusdesign_descriptionlibrary_sourcelibrary_strategylibrary_selectionlibrary_layoutinsert_sizeplatforminstrument_model
0experiment_alias_227study_alias_48_49sample_alias_224library_name_1experiment_title_1adddesign_description_1GENOMICWGARANDOMSINGLE250LS454454 GS 20
1experiment_alias_228study_alias_48_49sample_alias_225library_name_2experiment_title_2adddesign_description_2TRANSCRIPTOMICRNA-Seqrepeat fractionationSINGLENoneIlluminaIllumina Genome Analyzer
2experiment_alias_229study_alias_48_49sample_alias_225library_name_2experiment_title_3adddesign_description_3TRANSCRIPTOMICRNA-Seqrepeat fractionationSINGLENoneIlluminaIllumina Genome Analyzer
\n", + "
" + ], + "text/plain": [ + " alias study_alias sample_alias library_name \\\n", + "0 experiment_alias_227 study_alias_48_49 sample_alias_224 library_name_1 \n", + "1 experiment_alias_228 study_alias_48_49 sample_alias_225 library_name_2 \n", + "2 experiment_alias_229 study_alias_48_49 sample_alias_225 library_name_2 \n", + "\n", + " title accession submission date status design_description \\\n", + "0 experiment_title_1 add design_description_1 \n", + "1 experiment_title_2 add design_description_2 \n", + "2 experiment_title_3 add design_description_3 \n", + "\n", + " library_source library_strategy library_selection library_layout \\\n", + "0 GENOMIC WGA RANDOM SINGLE \n", + "1 TRANSCRIPTOMIC RNA-Seq repeat fractionation SINGLE \n", + "2 TRANSCRIPTOMIC RNA-Seq repeat fractionation SINGLE \n", + "\n", + " insert_size platform instrument_model \n", + "0 250 LS454 454 GS 20 \n", + "1 None Illumina Illumina Genome Analyzer \n", + "2 None Illumina Illumina Genome Analyzer " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataframe run:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
aliasexperiment_aliasfile_namefile_typefile checksumaccessionsubmission datestatus
0run_alias_233ena_run_alias_prefix227ENA_TEST2.R1.fastq.gzfastqadd
1run_alias_234ena_run_alias_prefix228ENA_TEST2.R2.fastq.gzfastqadd
2run_alias_235ena_run_alias_prefix229ENA_TEST1.R1.fastq.gzfastqadd
\n", + "
" + ], + "text/plain": [ + " alias experiment_alias file_name file_type \\\n", + "0 run_alias_233 ena_run_alias_prefix227 ENA_TEST2.R1.fastq.gz fastq \n", + "1 run_alias_234 ena_run_alias_prefix228 ENA_TEST2.R2.fastq.gz fastq \n", + "2 run_alias_235 ena_run_alias_prefix229 ENA_TEST1.R1.fastq.gz fastq \n", + "\n", + " file checksum accession submission date status \n", + "0 add \n", + "1 add \n", + "2 add " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done!\n" + ] + } + ], + "source": [ + "\n", + "if (not os.path.exists(outputfolder)) and export_to_excel:\n", + " os.makedirs(outputfolder)\n", + "\n", + "for k, df in submission_dfs.items():\n", + " print(f\"Dataframe {k}:\")\n", + " display(df)\n", + " if export_to_excel:\n", + " df.to_excel(f\"{outputfolder}{k}.xlsx\")\n", + "\n", + "print(\"Done!\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Automated pipeline for DataHub" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "config = dotenv_values(\".env\")\n", + "datahub_token = config[\"DATAHUB_API_TOKEN\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "\n", + "url = \"https://datahub-dev.elixir-belgium.org/single_pages/16/export_isa\"\n", + "url = \"http://localhost:3000/single_pages/2/export_isa\"\n", + "data = {\"key\": \"value\"}\n", + "headers = {\"Content-Type\": \"application/json\"}\n", + "\n", + "response = requests.post(url, json=data, headers=headers)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
aliastitlestudy_typestudy_abstractnew_study_typepubmed_id
0https://datahub.elixir-belgium.org/studies/27_28Ena Study 1Whole Genome SequencingThis is Ena Study 1.None5,6
1https://datahub.elixir-belgium.org/studies/29_30Ena Study 2OtherThis is Ena Study 2.My special study type5,6
2https://datahub.elixir-belgium.org/studies/31_32Ena Study 3OtherThis is Ena Study 3.My other special study type7
\n", - "
" - ], - "text/plain": [ - " alias title \\\n", - "0 https://datahub.elixir-belgium.org/studies/27_28 Ena Study 1 \n", - "1 https://datahub.elixir-belgium.org/studies/29_30 Ena Study 2 \n", - "2 https://datahub.elixir-belgium.org/studies/31_32 Ena Study 3 \n", - "\n", - " study_type study_abstract new_study_type \\\n", - "0 Whole Genome Sequencing This is Ena Study 1. None \n", - "1 Other This is Ena Study 2. My special study type \n", - "2 Other This is Ena Study 3. My other special study type \n", - "\n", - " pubmed_id \n", - "0 5,6 \n", - "1 5,6 \n", - "2 7 " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dataframe samples:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
aliastitlesample_descriptioncollection dateaccessionsubmission datestatusgeographic location (country and/or sea)taxon_id
0https://datahub.elixir-belgium.org/samples/142Sample title 1Sample description 12023Afghanistan1234
1https://datahub.elixir-belgium.org/samples/143Sample title 2Sample description 22022Afghanistan1234
2https://datahub.elixir-belgium.org/samples/144Sample title 3Sample description 32021Albania2345
3https://datahub.elixir-belgium.org/samples/145Sample title 4Sample description 42020Albania2345
4https://datahub.elixir-belgium.org/samples/164Sample title 5Sample description 52019Afghanistan9876
5https://datahub.elixir-belgium.org/samples/165Sample title 6Sample description 62018Albania8765
\n", - "
" - ], - "text/plain": [ - " alias title \\\n", - "0 https://datahub.elixir-belgium.org/samples/142 Sample title 1 \n", - "1 https://datahub.elixir-belgium.org/samples/143 Sample title 2 \n", - "2 https://datahub.elixir-belgium.org/samples/144 Sample title 3 \n", - "3 https://datahub.elixir-belgium.org/samples/145 Sample title 4 \n", - "4 https://datahub.elixir-belgium.org/samples/164 Sample title 5 \n", - "5 https://datahub.elixir-belgium.org/samples/165 Sample title 6 \n", - "\n", - " sample_description collection date accession submission date status \\\n", - "0 Sample description 1 2023 \n", - "1 Sample description 2 2022 \n", - "2 Sample description 3 2021 \n", - "3 Sample description 4 2020 \n", - "4 Sample description 5 2019 \n", - "5 Sample description 6 2018 \n", - "\n", - " geographic location (country and/or sea) taxon_id \n", - "0 Afghanistan 1234 \n", - "1 Afghanistan 1234 \n", - "2 Albania 2345 \n", - "3 Albania 2345 \n", - "4 Afghanistan 9876 \n", - "5 Albania 8765 " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dataframe experiments:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
aliasstudy_aliassample_aliaslibrary_nametitleaccessionsubmission datestatuslibrary_construction_protocoldesign_descriptionlibrary_sourcelibrary_strategylibrary_selectionlibrary_layoutinsert_sizeplatforminstrument_model
0https://datahub.elixir-belgium.org/samples/146https://datahub.elixir-belgium.org/studies/27_28https://datahub.elixir-belgium.org/samples/142Library 1Library title 1My special protocol 1Library description 1GENOMICWGSRANDOMSINGLE123LS454454 GS
1https://datahub.elixir-belgium.org/samples/147https://datahub.elixir-belgium.org/studies/27_28https://datahub.elixir-belgium.org/samples/143Library 2Library title 2My special protocol 2Library description 2GENOMIC SINGLE CELLWGAPCRPAIRED234IlluminaIllumina Genome Analyzer
2https://datahub.elixir-belgium.org/samples/148https://datahub.elixir-belgium.org/studies/27_28https://datahub.elixir-belgium.org/samples/144Library 3Library title 3My special protocol 3Library description 3TRANSCRIPTOMICWXSRANDOM PCRSINGLE345PacBioPacBio RS
3https://datahub.elixir-belgium.org/samples/149https://datahub.elixir-belgium.org/studies/27_28https://datahub.elixir-belgium.org/samples/145Library 4Library title 4My special protocol 4Library description 4TRANSCRIPTOMIC SINGLE CELLRNA-SeqRT-PCRPAIRED456Themo Fisher ScientificAB 3730xL Genetic Analyzer
4https://datahub.elixir-belgium.org/samples/154https://datahub.elixir-belgium.org/studies/29_30https://datahub.elixir-belgium.org/samples/142Library 5Library title 5My library construction protocol 5Library design description 5GENOMICssRNA-seqHMPRSINGLE123LS454454 GS 20
5https://datahub.elixir-belgium.org/samples/155https://datahub.elixir-belgium.org/studies/29_30https://datahub.elixir-belgium.org/samples/143Library 6Library title 6My library construction protocol 6Library design description 6GENOMIC SINGLE CELLmiRNA-SeqMFPAIRED234IlluminaIllumina Genome Analyzer II
6https://datahub.elixir-belgium.org/samples/156https://datahub.elixir-belgium.org/studies/29_30https://datahub.elixir-belgium.org/samples/144Library 7Library title 7My library construction protocol 7Library design description 7TRANSCRIPTOMICncRNA-Seqsize fractionationSINGLE345PacBioPacBio RS II
7https://datahub.elixir-belgium.org/samples/157https://datahub.elixir-belgium.org/studies/29_30https://datahub.elixir-belgium.org/samples/145Library 8Library title 8My library construction protocol 8Library design description 8SYNTHETICFL-cDNArepeat fractionationPAIRED456Themo Fisher ScientificAB 3730 Genetic Analyzer
8https://datahub.elixir-belgium.org/samples/166https://datahub.elixir-belgium.org/studies/31_32https://datahub.elixir-belgium.org/samples/164Library 9Library title 9My library construction protocol 9Library design description 9GENOMICESTMNaseSINGLE987Themo Fisher ScientificAB 3500xL Genetic Analyzer
9https://datahub.elixir-belgium.org/samples/167https://datahub.elixir-belgium.org/studies/31_32https://datahub.elixir-belgium.org/samples/165Library 10Library title 10My library construction protocol 10Library design description 10SYNTHETICHi-COligo-dTPAIRED876LS454454 GS FLX
\n", - "
" - ], - "text/plain": [ - " alias \\\n", - "0 https://datahub.elixir-belgium.org/samples/146 \n", - "1 https://datahub.elixir-belgium.org/samples/147 \n", - "2 https://datahub.elixir-belgium.org/samples/148 \n", - "3 https://datahub.elixir-belgium.org/samples/149 \n", - "4 https://datahub.elixir-belgium.org/samples/154 \n", - "5 https://datahub.elixir-belgium.org/samples/155 \n", - "6 https://datahub.elixir-belgium.org/samples/156 \n", - "7 https://datahub.elixir-belgium.org/samples/157 \n", - "8 https://datahub.elixir-belgium.org/samples/166 \n", - "9 https://datahub.elixir-belgium.org/samples/167 \n", - "\n", - " study_alias \\\n", - "0 https://datahub.elixir-belgium.org/studies/27_28 \n", - "1 https://datahub.elixir-belgium.org/studies/27_28 \n", - "2 https://datahub.elixir-belgium.org/studies/27_28 \n", - "3 https://datahub.elixir-belgium.org/studies/27_28 \n", - "4 https://datahub.elixir-belgium.org/studies/29_30 \n", - "5 https://datahub.elixir-belgium.org/studies/29_30 \n", - "6 https://datahub.elixir-belgium.org/studies/29_30 \n", - "7 https://datahub.elixir-belgium.org/studies/29_30 \n", - "8 https://datahub.elixir-belgium.org/studies/31_32 \n", - "9 https://datahub.elixir-belgium.org/studies/31_32 \n", - "\n", - " sample_alias library_name \\\n", - "0 https://datahub.elixir-belgium.org/samples/142 Library 1 \n", - "1 https://datahub.elixir-belgium.org/samples/143 Library 2 \n", - "2 https://datahub.elixir-belgium.org/samples/144 Library 3 \n", - "3 https://datahub.elixir-belgium.org/samples/145 Library 4 \n", - "4 https://datahub.elixir-belgium.org/samples/142 Library 5 \n", - "5 https://datahub.elixir-belgium.org/samples/143 Library 6 \n", - "6 https://datahub.elixir-belgium.org/samples/144 Library 7 \n", - "7 https://datahub.elixir-belgium.org/samples/145 Library 8 \n", - "8 https://datahub.elixir-belgium.org/samples/164 Library 9 \n", - "9 https://datahub.elixir-belgium.org/samples/165 Library 10 \n", - "\n", - " title accession submission date status \\\n", - "0 Library title 1 \n", - "1 Library title 2 \n", - "2 Library title 3 \n", - "3 Library title 4 \n", - "4 Library title 5 \n", - "5 Library title 6 \n", - "6 Library title 7 \n", - "7 Library title 8 \n", - "8 Library title 9 \n", - "9 Library title 10 \n", - "\n", - " library_construction_protocol design_description \\\n", - "0 My special protocol 1 Library description 1 \n", - "1 My special protocol 2 Library description 2 \n", - "2 My special protocol 3 Library description 3 \n", - "3 My special protocol 4 Library description 4 \n", - "4 My library construction protocol 5 Library design description 5 \n", - "5 My library construction protocol 6 Library design description 6 \n", - "6 My library construction protocol 7 Library design description 7 \n", - "7 My library construction protocol 8 Library design description 8 \n", - "8 My library construction protocol 9 Library design description 9 \n", - "9 My library construction protocol 10 Library design description 10 \n", - "\n", - " library_source library_strategy library_selection \\\n", - "0 GENOMIC WGS RANDOM \n", - "1 GENOMIC SINGLE CELL WGA PCR \n", - "2 TRANSCRIPTOMIC WXS RANDOM PCR \n", - "3 TRANSCRIPTOMIC SINGLE CELL RNA-Seq RT-PCR \n", - "4 GENOMIC ssRNA-seq HMPR \n", - "5 GENOMIC SINGLE CELL miRNA-Seq MF \n", - "6 TRANSCRIPTOMIC ncRNA-Seq size fractionation \n", - "7 SYNTHETIC FL-cDNA repeat fractionation \n", - "8 GENOMIC EST MNase \n", - "9 SYNTHETIC Hi-C Oligo-dT \n", - "\n", - " library_layout insert_size platform \\\n", - "0 SINGLE 123 LS454 \n", - "1 PAIRED 234 Illumina \n", - "2 SINGLE 345 PacBio \n", - "3 PAIRED 456 Themo Fisher Scientific \n", - "4 SINGLE 123 LS454 \n", - "5 PAIRED 234 Illumina \n", - "6 SINGLE 345 PacBio \n", - "7 PAIRED 456 Themo Fisher Scientific \n", - "8 SINGLE 987 Themo Fisher Scientific \n", - "9 PAIRED 876 LS454 \n", - "\n", - " instrument_model \n", - "0 454 GS \n", - "1 Illumina Genome Analyzer \n", - "2 PacBio RS \n", - "3 AB 3730xL Genetic Analyzer \n", - "4 454 GS 20 \n", - "5 Illumina Genome Analyzer II \n", - "6 PacBio RS II \n", - "7 AB 3730 Genetic Analyzer \n", - "8 AB 3500xL Genetic Analyzer \n", - "9 454 GS FLX " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dataframe runs:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
aliasexperiment_aliasfile_namefile_typefile checksumaccessionsubmission datestatus
0https://datahub.elixir-belgium.org/samples/150ena_run_alias_prefix146data_file_1.bambam
1https://datahub.elixir-belgium.org/samples/151ena_run_alias_prefix147data_file_2.cramcram
2https://datahub.elixir-belgium.org/samples/152ena_run_alias_prefix148data_file_3.fastqfastq
3https://datahub.elixir-belgium.org/samples/153ena_run_alias_prefix149data_file_4.sffsff
4https://datahub.elixir-belgium.org/samples/158ena_run_alias_prefix154data file 5.bambam
5https://datahub.elixir-belgium.org/samples/159ena_run_alias_prefix155data file 6.cramcram
6https://datahub.elixir-belgium.org/samples/160ena_run_alias_prefix156data file 7.fastqfastq
7https://datahub.elixir-belgium.org/samples/161ena_run_alias_prefix157data file 8.sffsff
8https://datahub.elixir-belgium.org/samples/168ena_run_alias_prefix166data file 9.sffsff
9https://datahub.elixir-belgium.org/samples/169ena_run_alias_prefix167data file 10.fastqfastq
\n", - "
" - ], - "text/plain": [ - " alias experiment_alias \\\n", - "0 https://datahub.elixir-belgium.org/samples/150 ena_run_alias_prefix146 \n", - "1 https://datahub.elixir-belgium.org/samples/151 ena_run_alias_prefix147 \n", - "2 https://datahub.elixir-belgium.org/samples/152 ena_run_alias_prefix148 \n", - "3 https://datahub.elixir-belgium.org/samples/153 ena_run_alias_prefix149 \n", - "4 https://datahub.elixir-belgium.org/samples/158 ena_run_alias_prefix154 \n", - "5 https://datahub.elixir-belgium.org/samples/159 ena_run_alias_prefix155 \n", - "6 https://datahub.elixir-belgium.org/samples/160 ena_run_alias_prefix156 \n", - "7 https://datahub.elixir-belgium.org/samples/161 ena_run_alias_prefix157 \n", - "8 https://datahub.elixir-belgium.org/samples/168 ena_run_alias_prefix166 \n", - "9 https://datahub.elixir-belgium.org/samples/169 ena_run_alias_prefix167 \n", - "\n", - " file_name file_type file checksum accession submission date status \n", - "0 data_file_1.bam bam \n", - "1 data_file_2.cram cram \n", - "2 data_file_3.fastq fastq \n", - "3 data_file_4.sff sff \n", - "4 data file 5.bam bam \n", - "5 data file 6.cram cram \n", - "6 data file 7.fastq fastq \n", - "7 data file 8.sff sff \n", - "8 data file 9.sff sff \n", - "9 data file 10.fastq fastq " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Done!\n" - ] - } - ], - "source": [ - "\n", - "if (not os.path.exists(outputfolder)) and export_to_excel:\n", - " os.makedirs(outputfolder)\n", - "\n", - "for k, df in submission_dfs.items():\n", - " print(f\"Dataframe {k}:\")\n", - " display(df)\n", - " if export_to_excel:\n", - " df.to_excel(f\"{outputfolder}{k}.xlsx\")\n", - "\n", - "print(\"Done!\")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/tests/test_data/simple_test_case.json b/tests/test_data/simple_test_case.json index acd75b7..52926a8 100644 --- a/tests/test_data/simple_test_case.json +++ b/tests/test_data/simple_test_case.json @@ -9,7 +9,7 @@ "comments": [ { "name": "ISAjson export time", - "value": "2023-10-20T12:28:47Z" + "value": "2023-10-20T12:42:06Z" }, { "name": "SEEK Project name", @@ -67,12 +67,12 @@ "value": "sample_alias_" }, { - "@id": "#study_comment/25_23caf0e0-5172-013c-675f-7a163e608de1", + "@id": "#study_comment/25_0051dc80-5174-013c-6763-7a163e608de1", "name": "SEEK Study ID", "value": "25" }, { - "@id": "#study_comment/25_23caf880-5172-013c-6760-7a163e608de1", + "@id": "#study_comment/25_0051e690-5174-013c-6764-7a163e608de1", "name": "SEEK creation date", "value": "2023-10-20T11:12:23Z" } @@ -671,7 +671,7 @@ "assays": [ { "@id": "#assay/48_49", - "filename": "a_assays_48_49.txt", + "filename": "a_ena_stream_1.txt", "measurementType": { "annotationValue": "", "termSource": "", @@ -683,6 +683,46 @@ "termAccession": "" }, "comments": [ + { + "@id": "#assay_comment/25_48_49_14_50", + "name": "ena_study_title", + "value": "study_title_1" + }, + { + "@id": "#assay_comment/25_48_49_14_51", + "name": "study_type", + "value": "Transcriptome Analysis" + }, + { + "@id": "#assay_comment/25_48_49_14_52", + "name": "new_study_type", + "value": "This should not be submitted!" + }, + { + "@id": "#assay_comment/25_48_49_14_53", + "name": "ena_study_abstract", + "value": "study_abstract_1" + }, + { + "@id": "#assay_comment/25_48_49_14_54", + "name": "assay_stream", + "value": "Ena stream 1" + }, + { + "@id": "#assay_comment/25_48_49_14_55", + "name": "ena_study_alias_prefix", + "value": "study_alias_" + }, + { + "@id": "#assay_comment/25_48_49_14_56", + "name": "ena_experiment_alias_prefix", + "value": "experiment_alias_" + }, + { + "@id": "#assay_comment/25_48_49_14_57", + "name": "ena_run_alias_prefix", + "value": "run_alias_" + }, { "@id": "#assay_comment/48_49", "name": "linked_assays", From 25462ed2d7e64aa27c8a8feb7d5bf4257f44c95b Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 20 Oct 2023 17:36:13 +0200 Subject: [PATCH 45/62] Fix experiment alias in runs table --- ena_upload/json_parsing/ena_run.py | 13 +++++++++---- example_read_isa_json.ipynb | 30 ++++++++++++++---------------- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/ena_upload/json_parsing/ena_run.py b/ena_upload/json_parsing/ena_run.py index d59e763..c8cf50d 100644 --- a/ena_upload/json_parsing/ena_run.py +++ b/ena_upload/json_parsing/ena_run.py @@ -3,6 +3,7 @@ from pandas import DataFrame from ena_upload.json_parsing.characteristic import IsaBase +from ena_upload.json_parsing.ena_experiment import EnaExperiment from ena_upload.json_parsing.ena_std_lib import ( fetch_assay_comment_by_name, get_assay_sample_associations, @@ -123,7 +124,7 @@ def get_derived_expertiment_id( return association["input"][0] -def fetch_experiment_alias(data_file: DataFile) -> str: +def fetch_experiment_alias(data_file: DataFile, prefix: str) -> str: """Generates the experiment alias from the information in the provided data file. Args: @@ -132,7 +133,7 @@ def fetch_experiment_alias(data_file: DataFile) -> str: Returns: str: associated experiment alias """ - return EnaRun.prefix + clip_off_prefix(data_file.derived_experiment_id) + return prefix + clip_off_prefix(data_file.derived_experiment_id) class EnaRun(IsaBase): @@ -167,7 +168,9 @@ def from_assay_stream(self, assay_stream: Dict[str, str]) -> None: sample_datafile_associations = get_assay_sample_associations(assay_stream) prefix = fetch_assay_comment_by_name(assay_stream, EnaRun.prefix)["value"] - + ena_experiment_prefix = fetch_assay_comment_by_name( + assay_stream, EnaExperiment.prefix + )["value"] for data_file in assay_stream["dataFiles"]: current_data_file = DataFile.from_data_file_dict( data_file, sample_datafile_associations @@ -175,7 +178,9 @@ def from_assay_stream(self, assay_stream: Dict[str, str]) -> None: ena_runs.append( EnaRun( alias=run_alias(data_file, prefix), - experiment_alias=fetch_experiment_alias(current_data_file), + experiment_alias=fetch_experiment_alias( + current_data_file, ena_experiment_prefix + ), data_file=current_data_file, ) ) diff --git a/example_read_isa_json.ipynb b/example_read_isa_json.ipynb index 871ac2f..929379c 100644 --- a/example_read_isa_json.ipynb +++ b/example_read_isa_json.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -56,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -68,9 +68,7 @@ "\n", "required_assays = [\n", " {\"assay_stream\": \"Ena stream 1\"},\n", - " {\"ena_study_title\": \"Ena Study 2\"},\n", - " {\"ena_study_title\": \"Ena Study 3\"},\n", - "]" + " ]" ] }, { @@ -82,7 +80,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -101,7 +99,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -425,7 +423,7 @@ " \n", " 0\n", " run_alias_233\n", - " ena_run_alias_prefix227\n", + " experiment_alias_227\n", " ENA_TEST2.R1.fastq.gz\n", " fastq\n", " \n", @@ -436,7 +434,7 @@ " \n", " 1\n", " run_alias_234\n", - " ena_run_alias_prefix228\n", + " experiment_alias_228\n", " ENA_TEST2.R2.fastq.gz\n", " fastq\n", " \n", @@ -447,7 +445,7 @@ " \n", " 2\n", " run_alias_235\n", - " ena_run_alias_prefix229\n", + " experiment_alias_229\n", " ENA_TEST1.R1.fastq.gz\n", " fastq\n", " \n", @@ -460,10 +458,10 @@ "" ], "text/plain": [ - " alias experiment_alias file_name file_type \\\n", - "0 run_alias_233 ena_run_alias_prefix227 ENA_TEST2.R1.fastq.gz fastq \n", - "1 run_alias_234 ena_run_alias_prefix228 ENA_TEST2.R2.fastq.gz fastq \n", - "2 run_alias_235 ena_run_alias_prefix229 ENA_TEST1.R1.fastq.gz fastq \n", + " alias experiment_alias file_name file_type \\\n", + "0 run_alias_233 experiment_alias_227 ENA_TEST2.R1.fastq.gz fastq \n", + "1 run_alias_234 experiment_alias_228 ENA_TEST2.R2.fastq.gz fastq \n", + "2 run_alias_235 experiment_alias_229 ENA_TEST1.R1.fastq.gz fastq \n", "\n", " file checksum accession submission date status \n", "0 add \n", @@ -505,7 +503,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ From 298d4eb8919920b01ed0a20af2b5ee5854e85e60 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Wed, 8 Nov 2023 18:30:54 +0100 Subject: [PATCH 46/62] Make run alias the process id instead of sample id for sequencing data with paired library layout --- ena_upload/json_parsing/ena_run.py | 18 +- example_read_isa_json.ipynb | 26 +- tests/test_data/simple_test_case.json | 1658 ---------------------- tests/test_data/simple_test_case_v2.json | 1631 +++++++++++++++++++++ 4 files changed, 1659 insertions(+), 1674 deletions(-) delete mode 100644 tests/test_data/simple_test_case.json create mode 100644 tests/test_data/simple_test_case_v2.json diff --git a/ena_upload/json_parsing/ena_run.py b/ena_upload/json_parsing/ena_run.py index c8cf50d..7341dd2 100644 --- a/ena_upload/json_parsing/ena_run.py +++ b/ena_upload/json_parsing/ena_run.py @@ -93,7 +93,16 @@ def to_dict(self) -> Dict[str, str]: } -def run_alias(data_file: Dict[str, str], prefix: str) -> str: +def get_involved_process_id(id: str, process_sequence: List[Dict[str, str]]): + for process in process_sequence: + output_ids = [output["@id"] for output in process["outputs"]] + if id in output_ids: + return process["@id"] + + +def run_alias( + data_file: Dict[str, str], process_sequence: List[Dict[str, str]], prefix: str +) -> str: """Generates an alias for the run, based on the data file dictionary and prefix specified in the Class @@ -104,7 +113,10 @@ def run_alias(data_file: Dict[str, str], prefix: str) -> str: Returns: str: Resulting alias """ - return prefix + clip_off_prefix(data_file["@id"]) + + data_file_id = data_file["@id"] + process_id = get_involved_process_id(data_file_id, process_sequence) + return prefix + clip_off_prefix(process_id) def get_derived_expertiment_id( @@ -177,7 +189,7 @@ def from_assay_stream(self, assay_stream: Dict[str, str]) -> None: ) ena_runs.append( EnaRun( - alias=run_alias(data_file, prefix), + alias=run_alias(data_file, assay_stream["processSequence"], prefix), experiment_alias=fetch_experiment_alias( current_data_file, ena_experiment_prefix ), diff --git a/example_read_isa_json.ipynb b/example_read_isa_json.ipynb index 929379c..0ffbaed 100644 --- a/example_read_isa_json.ipynb +++ b/example_read_isa_json.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -35,14 +35,14 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "\n", "# Read json file\n", "isa_json_file = open(\n", - " \"tests/test_data/simple_test_case.json\"\n", + " \"tests/test_data/simple_test_case_v2.json\"\n", ")\n", "isa_json = json.load(isa_json_file)" ] @@ -56,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -80,7 +80,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -99,7 +99,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -422,7 +422,7 @@ " \n", " \n", " 0\n", - " run_alias_233\n", + " run_alias_233_234\n", " experiment_alias_227\n", " ENA_TEST2.R1.fastq.gz\n", " fastq\n", @@ -433,8 +433,8 @@ " \n", " \n", " 1\n", - " run_alias_234\n", - " experiment_alias_228\n", + " run_alias_233_234\n", + " experiment_alias_227\n", " ENA_TEST2.R2.fastq.gz\n", " fastq\n", " \n", @@ -458,10 +458,10 @@ "" ], "text/plain": [ - " alias experiment_alias file_name file_type \\\n", - "0 run_alias_233 experiment_alias_227 ENA_TEST2.R1.fastq.gz fastq \n", - "1 run_alias_234 experiment_alias_228 ENA_TEST2.R2.fastq.gz fastq \n", - "2 run_alias_235 experiment_alias_229 ENA_TEST1.R1.fastq.gz fastq \n", + " alias experiment_alias file_name file_type \\\n", + "0 run_alias_233_234 experiment_alias_227 ENA_TEST2.R1.fastq.gz fastq \n", + "1 run_alias_233_234 experiment_alias_227 ENA_TEST2.R2.fastq.gz fastq \n", + "2 run_alias_235 experiment_alias_229 ENA_TEST1.R1.fastq.gz fastq \n", "\n", " file checksum accession submission date status \n", "0 add \n", diff --git a/tests/test_data/simple_test_case.json b/tests/test_data/simple_test_case.json deleted file mode 100644 index 52926a8..0000000 --- a/tests/test_data/simple_test_case.json +++ /dev/null @@ -1,1658 +0,0 @@ -{ - "identifier": "", - "title": "Test Case ENA upload tool", - "description": "This investigation matches the test case of the ENA upload tool", - "submissionDate": "", - "publicReleaseDate": "", - "ontologySourceReferences": [], - "filename": "Test Case ENA upload tool.txt", - "comments": [ - { - "name": "ISAjson export time", - "value": "2023-10-20T12:42:06Z" - }, - { - "name": "SEEK Project name", - "value": "Test Project ENA upload Tool" - }, - { - "name": "SEEK Project ID", - "value": "http://localhost:3000/single_pages/16" - }, - { - "name": "SEEK Investigation ID", - "value": "27" - } - ], - "publications": [], - "people": [ - { - "@id": "#people/4", - "lastName": "De Pelseneer", - "firstName": "Kevin", - "midInitials": "", - "email": "kevin.depelseneer@psb.ugent.be", - "phone": "", - "fax": "", - "address": "", - "affiliation": "", - "roles": [ - { - "termAccession": "", - "termSource": "", - "annotationValue": "" - } - ], - "comments": [ - { - "@id": "", - "value": "", - "name": "" - } - ] - } - ], - "studies": [ - { - "identifier": "", - "title": "Study - Test Case ENA", - "description": "", - "submissionDate": "", - "publicReleaseDate": "", - "filename": "Study - Test Case ENA.txt", - "comments": [ - { - "@id": "#study_comment/25_13_49", - "name": "ena_sample_alias_prefix", - "value": "sample_alias_" - }, - { - "@id": "#study_comment/25_0051dc80-5174-013c-6763-7a163e608de1", - "name": "SEEK Study ID", - "value": "25" - }, - { - "@id": "#study_comment/25_0051e690-5174-013c-6764-7a163e608de1", - "name": "SEEK creation date", - "value": "2023-10-20T11:12:23Z" - } - ], - "publications": [], - "people": [ - { - "@id": "#people/4", - "lastName": "De Pelseneer", - "firstName": "Kevin", - "midInitials": "", - "email": "kevin.depelseneer@psb.ugent.be", - "phone": "", - "fax": "", - "address": "", - "affiliation": "", - "roles": [ - { - "termAccession": "", - "termSource": "", - "annotationValue": "" - } - ], - "comments": [ - { - "@id": "", - "value": "", - "name": "" - } - ] - } - ], - "studyDesignDescriptors": [], - "characteristicCategories": [ - { - "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_657", - "characteristicType": { - "annotationValue": "geographic location (country and/or sea)", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#characteristic_category/taxon_id_658", - "characteristicType": { - "annotationValue": "taxon_id", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#characteristic_category/title_662", - "characteristicType": { - "annotationValue": "title", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#characteristic_category/sample_description_663", - "characteristicType": { - "annotationValue": "sample_description", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#characteristic_category/collection_date_664", - "characteristicType": { - "annotationValue": "collection date", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#characteristic_category/accession_665", - "characteristicType": { - "annotationValue": "accession", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#characteristic_category/submission_date_666", - "characteristicType": { - "annotationValue": "submission date", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#characteristic_category/status_667", - "characteristicType": { - "annotationValue": "status", - "termAccession": "", - "termSource": "" - } - } - ], - "materials": { - "sources": [ - { - "@id": "#source/223", - "name": "Source 1", - "characteristics": [ - { - "category": { - "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_657" - }, - "value": { - "annotationValue": "Argentina", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/taxon_id_658" - }, - "value": { - "annotationValue": "9606", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#source/226", - "name": "Source 2", - "characteristics": [ - { - "category": { - "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_657" - }, - "value": { - "annotationValue": "Belgium", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/taxon_id_658" - }, - "value": { - "annotationValue": "646099", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - } - ], - "samples": [ - { - "@id": "#sample/224", - "name": "Sample 1", - "derivesFrom": [ - { - "@id": "#source/223" - } - ], - "characteristics": [ - { - "category": { - "@id": "#characteristic_category/title_662" - }, - "value": { - "annotationValue": "sample_title_1", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/sample_description_663" - }, - "value": { - "annotationValue": "sample_description_1", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/collection_date_664" - }, - "value": { - "annotationValue": "2020-10-11", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_665" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_666" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/status_667" - }, - "value": { - "annotationValue": "add", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "factorValues": [ - { - "category": { - "@id": "" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - }, - { - "@id": "#sample/225", - "name": "Sample 2", - "derivesFrom": [ - { - "@id": "#source/226" - } - ], - "characteristics": [ - { - "category": { - "@id": "#characteristic_category/title_662" - }, - "value": { - "annotationValue": "sample_title_2", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/sample_description_663" - }, - "value": { - "annotationValue": "sample_description_2", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/collection_date_664" - }, - "value": { - "annotationValue": "2008-01-24", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_665" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_666" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/status_667" - }, - "value": { - "annotationValue": "add", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "factorValues": [ - { - "category": { - "@id": "" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ] - } - ] - }, - "protocols": [ - { - "@id": "#protocol/_25", - "name": "sample collection", - "protocolType": { - "annotationValue": "sample collection", - "termAccession": "", - "termSource": "" - }, - "description": "", - "uri": "", - "version": "", - "parameters": [], - "components": [ - { - "componentName": "", - "componentType": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - } - } - ] - }, - { - "@id": "#protocol/_48", - "name": "library construction", - "protocolType": { - "annotationValue": "library construction", - "termAccession": "", - "termSource": "" - }, - "description": "", - "uri": "", - "version": "", - "parameters": [ - { - "@id": "#parameter/672", - "parameterName": { - "annotationValue": "design_description", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#parameter/674", - "parameterName": { - "annotationValue": "library_source", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#parameter/675", - "parameterName": { - "annotationValue": "library_strategy", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#parameter/676", - "parameterName": { - "annotationValue": "library_selection", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#parameter/677", - "parameterName": { - "annotationValue": "library_layout", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#parameter/678", - "parameterName": { - "annotationValue": "insert_size", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#parameter/679", - "parameterName": { - "annotationValue": "platform", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#parameter/680", - "parameterName": { - "annotationValue": "instrument_model", - "termAccession": "", - "termSource": "" - } - } - ], - "components": [ - { - "componentName": "", - "componentType": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - } - } - ] - }, - { - "@id": "#protocol/_49", - "name": "nucleic acid sequencing", - "protocolType": { - "annotationValue": "nucleic acid sequencing", - "termAccession": "", - "termSource": "" - }, - "description": "", - "uri": "", - "version": "", - "parameters": [], - "components": [ - { - "componentName": "", - "componentType": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - } - } - ] - } - ], - "processSequence": [ - { - "@id": "#process/sample_collection/224", - "name": "", - "executesProtocol": { - "@id": "#protocol/_25" - }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [ - { - "@id": "#source/223" - } - ], - "outputs": [ - { - "@id": "#sample/224" - } - ] - }, - { - "@id": "#process/sample_collection/225", - "name": "", - "executesProtocol": { - "@id": "#protocol/_25" - }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [ - { - "@id": "#source/226" - } - ], - "outputs": [ - { - "@id": "#sample/225" - } - ] - } - ], - "assays": [ - { - "@id": "#assay/48_49", - "filename": "a_ena_stream_1.txt", - "measurementType": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "technologyType": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "comments": [ - { - "@id": "#assay_comment/25_48_49_14_50", - "name": "ena_study_title", - "value": "study_title_1" - }, - { - "@id": "#assay_comment/25_48_49_14_51", - "name": "study_type", - "value": "Transcriptome Analysis" - }, - { - "@id": "#assay_comment/25_48_49_14_52", - "name": "new_study_type", - "value": "This should not be submitted!" - }, - { - "@id": "#assay_comment/25_48_49_14_53", - "name": "ena_study_abstract", - "value": "study_abstract_1" - }, - { - "@id": "#assay_comment/25_48_49_14_54", - "name": "assay_stream", - "value": "Ena stream 1" - }, - { - "@id": "#assay_comment/25_48_49_14_55", - "name": "ena_study_alias_prefix", - "value": "study_alias_" - }, - { - "@id": "#assay_comment/25_48_49_14_56", - "name": "ena_experiment_alias_prefix", - "value": "experiment_alias_" - }, - { - "@id": "#assay_comment/25_48_49_14_57", - "name": "ena_run_alias_prefix", - "value": "run_alias_" - }, - { - "@id": "#assay_comment/48_49", - "name": "linked_assays", - "value": "[{\"id\":48,\"title\":\"Assay 1 - Test case ENA upload tool\"},{\"id\":49,\"title\":\"Assay 2 - Test case ENA upload tool\"}]" - } - ], - "technologyPlatform": "", - "characteristicCategories": [ - { - "@id": "#characteristic_category/title_671", - "characteristicType": { - "annotationValue": "title", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#characteristic_category/library_name_673", - "characteristicType": { - "annotationValue": "library_name", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#characteristic_category/accession_681", - "characteristicType": { - "annotationValue": "accession", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#characteristic_category/submission_date_682", - "characteristicType": { - "annotationValue": "submission date", - "termAccession": "", - "termSource": "" - } - }, - { - "@id": "#characteristic_category/status_683", - "characteristicType": { - "annotationValue": "status", - "termAccession": "", - "termSource": "" - } - } - ], - "materials": { - "samples": [], - "otherMaterials": [ - { - "@id": "#other_material/227", - "name": "library_construction_protocol_1", - "type": "library_construction_protocol", - "characteristics": [ - { - "category": { - "@id": "#characteristic_category/title_671" - }, - "value": { - "annotationValue": "experiment_title_1", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/library_name_673" - }, - "value": { - "annotationValue": "library_name_1", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_681" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_682" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/status_683" - }, - "value": { - "annotationValue": "add", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [ - { - "@id": "#sample/224" - } - ] - }, - { - "@id": "#other_material/228", - "name": "library_construction_protocol_2", - "type": "library_construction_protocol", - "characteristics": [ - { - "category": { - "@id": "#characteristic_category/title_671" - }, - "value": { - "annotationValue": "experiment_title_2", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/library_name_673" - }, - "value": { - "annotationValue": "library_name_2", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_681" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_682" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/status_683" - }, - "value": { - "annotationValue": "add", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [ - { - "@id": "#sample/225" - } - ] - }, - { - "@id": "#other_material/229", - "name": "library_construction_protocol_2", - "type": "library_construction_protocol", - "characteristics": [ - { - "category": { - "@id": "#characteristic_category/title_671" - }, - "value": { - "annotationValue": "experiment_title_3", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/library_name_673" - }, - "value": { - "annotationValue": "library_name_2", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/accession_681" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/submission_date_682" - }, - "value": { - "annotationValue": "", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#characteristic_category/status_683" - }, - "value": { - "annotationValue": "add", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "derivesFrom": [ - { - "@id": "#sample/225" - } - ] - } - ] - }, - "processSequence": [ - { - "@id": "#process/library_construction/227", - "name": "", - "executesProtocol": { - "@id": "#protocol/_48" - }, - "parameterValues": [ - { - "category": { - "@id": "#parameter/672" - }, - "value": { - "annotationValue": "design_description_1", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/674" - }, - "value": { - "annotationValue": "GENOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/675" - }, - "value": { - "annotationValue": "WGA", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/676" - }, - "value": { - "annotationValue": "RANDOM", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/677" - }, - "value": { - "annotationValue": "SINGLE", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/678" - }, - "value": { - "annotationValue": "250", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/679" - }, - "value": { - "annotationValue": "LS454", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/680" - }, - "value": { - "annotationValue": "454 GS 20", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { - "@id": "#process/sample_collection/227" - }, - "nextProcess": { - "@id": "#process/nucleic_acid_sequencing/227" - }, - "inputs": [ - { - "@id": "#sample/224" - } - ], - "outputs": [ - { - "@id": "#other_material/227" - } - ] - }, - { - "@id": "#process/library_construction/228", - "name": "", - "executesProtocol": { - "@id": "#protocol/_48" - }, - "parameterValues": [ - { - "category": { - "@id": "#parameter/672" - }, - "value": { - "annotationValue": "design_description_2", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/674" - }, - "value": { - "annotationValue": "TRANSCRIPTOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/675" - }, - "value": { - "annotationValue": "RNA-Seq", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/676" - }, - "value": { - "annotationValue": "repeat fractionation", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/677" - }, - "value": { - "annotationValue": "SINGLE", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/678" - }, - "value": { - "annotationValue": "None", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/679" - }, - "value": { - "annotationValue": "Illumina", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/680" - }, - "value": { - "annotationValue": "Illumina Genome Analyzer", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { - "@id": "#process/sample_collection/228" - }, - "nextProcess": { - "@id": "#process/nucleic_acid_sequencing/228" - }, - "inputs": [ - { - "@id": "#sample/225" - } - ], - "outputs": [ - { - "@id": "#other_material/228" - } - ] - }, - { - "@id": "#process/library_construction/229", - "name": "", - "executesProtocol": { - "@id": "#protocol/_48" - }, - "parameterValues": [ - { - "category": { - "@id": "#parameter/672" - }, - "value": { - "annotationValue": "design_description_3", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/674" - }, - "value": { - "annotationValue": "TRANSCRIPTOMIC", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/675" - }, - "value": { - "annotationValue": "RNA-Seq", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/676" - }, - "value": { - "annotationValue": "repeat fractionation", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/677" - }, - "value": { - "annotationValue": "SINGLE", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/678" - }, - "value": { - "annotationValue": "None", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/679" - }, - "value": { - "annotationValue": "Illumina", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - }, - { - "category": { - "@id": "#parameter/680" - }, - "value": { - "annotationValue": "Illumina Genome Analyzer", - "termSource": "", - "termAccession": "" - }, - "unit": { - "termSource": "", - "termAccession": "", - "comments": [] - } - } - ], - "performer": "", - "date": "", - "previousProcess": { - "@id": "#process/sample_collection/229" - }, - "nextProcess": { - "@id": "#process/nucleic_acid_sequencing/229" - }, - "inputs": [ - { - "@id": "#sample/225" - } - ], - "outputs": [ - { - "@id": "#other_material/229" - } - ] - }, - { - "@id": "#process/nucleic_acid_sequencing/233", - "name": "", - "executesProtocol": { - "@id": "#protocol/_49" - }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": { - "@id": "#process/library_construction/233" - }, - "nextProcess": {}, - "inputs": [ - { - "@id": "#other_material/227" - } - ], - "outputs": [ - { - "@id": "#data_file/233" - } - ] - }, - { - "@id": "#process/nucleic_acid_sequencing/234", - "name": "", - "executesProtocol": { - "@id": "#protocol/_49" - }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": { - "@id": "#process/library_construction/234" - }, - "nextProcess": {}, - "inputs": [ - { - "@id": "#other_material/228" - } - ], - "outputs": [ - { - "@id": "#data_file/234" - } - ] - }, - { - "@id": "#process/nucleic_acid_sequencing/235", - "name": "", - "executesProtocol": { - "@id": "#protocol/_49" - }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": { - "@id": "#process/library_construction/235" - }, - "nextProcess": {}, - "inputs": [ - { - "@id": "#other_material/229" - } - ], - "outputs": [ - { - "@id": "#data_file/235" - } - ] - } - ], - "dataFiles": [ - { - "@id": "#data_file/233", - "name": "RD file 1", - "type": "Raw Data File", - "comments": [ - { - "name": "file_name", - "value": "ENA_TEST2.R1.fastq.gz" - }, - { - "name": "file_type", - "value": "fastq" - }, - { - "name": "file checksum", - "value": "" - }, - { - "name": "accession", - "value": "" - }, - { - "name": "submission date", - "value": "" - }, - { - "name": "status", - "value": "add" - } - ] - }, - { - "@id": "#data_file/234", - "name": "RD file 2", - "type": "Raw Data File", - "comments": [ - { - "name": "file_name", - "value": "ENA_TEST2.R2.fastq.gz" - }, - { - "name": "file_type", - "value": "fastq" - }, - { - "name": "file checksum", - "value": "" - }, - { - "name": "accession", - "value": "" - }, - { - "name": "submission date", - "value": "" - }, - { - "name": "status", - "value": "add" - } - ] - }, - { - "@id": "#data_file/235", - "name": "RD file 3", - "type": "Raw Data File", - "comments": [ - { - "name": "file_name", - "value": "ENA_TEST1.R1.fastq.gz" - }, - { - "name": "file_type", - "value": "fastq" - }, - { - "name": "file checksum", - "value": "" - }, - { - "name": "accession", - "value": "" - }, - { - "name": "submission date", - "value": "" - }, - { - "name": "status", - "value": "add" - } - ] - } - ], - "unitCategories": [] - } - ], - "factors": [], - "unitCategories": [] - } - ] -} \ No newline at end of file diff --git a/tests/test_data/simple_test_case_v2.json b/tests/test_data/simple_test_case_v2.json new file mode 100644 index 0000000..2f6a1d4 --- /dev/null +++ b/tests/test_data/simple_test_case_v2.json @@ -0,0 +1,1631 @@ +{ + "identifier": "", + "title": "Test Case ENA upload tool", + "description": "This investigation matches the test case of the ENA upload tool", + "submissionDate": "", + "publicReleaseDate": "", + "ontologySourceReferences": [], + "filename": "Test Case ENA upload tool.txt", + "comments": [ + { + "name": "ISAjson export time", + "value": "2023-11-08T16:27:49Z" + }, + { + "name": "SEEK Project name", + "value": "Test Project ENA upload Tool" + }, + { + "name": "SEEK Project ID", + "value": "http://localhost:3000/single_pages/16" + }, + { + "name": "SEEK Investigation ID", + "value": "27" + } + ], + "publications": [], + "people": [ + { + "@id": "#people/4", + "lastName": "De Pelseneer", + "firstName": "Kevin", + "midInitials": "", + "email": "kevin.depelseneer@psb.ugent.be", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { + "termAccession": "", + "termSource": "", + "annotationValue": "" + } + ], + "comments": [ + { + "@id": "", + "value": "", + "name": "" + } + ] + } + ], + "studies": [ + { + "identifier": "", + "title": "Study - Test Case ENA", + "description": "", + "submissionDate": "", + "publicReleaseDate": "", + "filename": "Study - Test Case ENA.txt", + "comments": [ + { + "@id": "#study_comment/25_13_49", + "name": "ena_sample_alias_prefix", + "value": "sample_alias_" + }, + { + "@id": "#study_comment/25_ae7461a0-6081-013c-9ed3-7a163e608de1", + "name": "SEEK Study ID", + "value": "25" + }, + { + "@id": "#study_comment/25_ae7467e0-6081-013c-9ed4-7a163e608de1", + "name": "SEEK creation date", + "value": "2023-10-20T11:12:23Z" + } + ], + "publications": [], + "people": [ + { + "@id": "#people/4", + "lastName": "De Pelseneer", + "firstName": "Kevin", + "midInitials": "", + "email": "kevin.depelseneer@psb.ugent.be", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { + "termAccession": "", + "termSource": "", + "annotationValue": "" + } + ], + "comments": [ + { + "@id": "", + "value": "", + "name": "" + } + ] + } + ], + "studyDesignDescriptors": [], + "characteristicCategories": [ + { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_657", + "characteristicType": { + "annotationValue": "geographic location (country and/or sea)", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/taxon_id_658", + "characteristicType": { + "annotationValue": "taxon_id", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/title_662", + "characteristicType": { + "annotationValue": "title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/sample_description_663", + "characteristicType": { + "annotationValue": "sample_description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/collection_date_664", + "characteristicType": { + "annotationValue": "collection date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/accession_665", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_666", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_667", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "sources": [ + { + "@id": "#source/223", + "name": "Source 1", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_657" + }, + "value": { + "annotationValue": "Argentina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/taxon_id_658" + }, + "value": { + "annotationValue": "9606", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#source/226", + "name": "Source 2", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_657" + }, + "value": { + "annotationValue": "Belgium", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/taxon_id_658" + }, + "value": { + "annotationValue": "646099", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ], + "samples": [ + { + "@id": "#sample/224", + "name": "Sample 1", + "derivesFrom": [ + { + "@id": "#source/223" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_662" + }, + "value": { + "annotationValue": "sample_title_1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_663" + }, + "value": { + "annotationValue": "sample_description_1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_664" + }, + "value": { + "annotationValue": "2020-10-11", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_665" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_666" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_667" + }, + "value": { + "annotationValue": "add", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/225", + "name": "Sample 2", + "derivesFrom": [ + { + "@id": "#source/226" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_662" + }, + "value": { + "annotationValue": "sample_title_2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_663" + }, + "value": { + "annotationValue": "sample_description_2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_664" + }, + "value": { + "annotationValue": "2008-01-24", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_665" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_666" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_667" + }, + "value": { + "annotationValue": "add", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ] + }, + "protocols": [ + { + "@id": "#protocol/_25", + "name": "sample collection", + "protocolType": { + "annotationValue": "sample collection", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_48", + "name": "library construction", + "protocolType": { + "annotationValue": "library construction", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [ + { + "@id": "#parameter/672", + "parameterName": { + "annotationValue": "design_description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/674", + "parameterName": { + "annotationValue": "library_source", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/675", + "parameterName": { + "annotationValue": "library_strategy", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/676", + "parameterName": { + "annotationValue": "library_selection", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/677", + "parameterName": { + "annotationValue": "library_layout", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/678", + "parameterName": { + "annotationValue": "insert_size", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/679", + "parameterName": { + "annotationValue": "platform", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/680", + "parameterName": { + "annotationValue": "instrument_model", + "termAccession": "", + "termSource": "" + } + } + ], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_49", + "name": "nucleic acid sequencing", + "protocolType": { + "annotationValue": "nucleic acid sequencing", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + } + ], + "processSequence": [ + { + "@id": "#process/sample_collection/224", + "name": "", + "executesProtocol": { + "@id": "#protocol/_25" + }, + "parameterValues": [], + "performer": "", + "date": "", + "inputs": [ + { + "@id": "#source/223" + } + ], + "outputs": [ + { + "@id": "#sample/224" + } + ] + }, + { + "@id": "#process/sample_collection/225", + "name": "", + "executesProtocol": { + "@id": "#protocol/_25" + }, + "parameterValues": [], + "performer": "", + "date": "", + "inputs": [ + { + "@id": "#source/226" + } + ], + "outputs": [ + { + "@id": "#sample/225" + } + ] + } + ], + "assays": [ + { + "@id": "#assay/48_49", + "filename": "a_ena_stream_1.txt", + "measurementType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "technologyType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "comments": [ + { + "@id": "#assay_comment/25_48_49_14_50", + "name": "ena_study_title", + "value": "study_title_1" + }, + { + "@id": "#assay_comment/25_48_49_14_51", + "name": "study_type", + "value": "Transcriptome Analysis" + }, + { + "@id": "#assay_comment/25_48_49_14_52", + "name": "new_study_type", + "value": "This should not be submitted!" + }, + { + "@id": "#assay_comment/25_48_49_14_53", + "name": "ena_study_abstract", + "value": "study_abstract_1" + }, + { + "@id": "#assay_comment/25_48_49_14_54", + "name": "assay_stream", + "value": "Ena stream 1" + }, + { + "@id": "#assay_comment/25_48_49_14_55", + "name": "ena_study_alias_prefix", + "value": "study_alias_" + }, + { + "@id": "#assay_comment/25_48_49_14_56", + "name": "ena_experiment_alias_prefix", + "value": "experiment_alias_" + }, + { + "@id": "#assay_comment/25_48_49_14_57", + "name": "ena_run_alias_prefix", + "value": "run_alias_" + }, + { + "@id": "#assay_comment/48_49", + "name": "linked_assays", + "value": "[{\"id\":48,\"title\":\"Assay 1 - Test case ENA upload tool\"},{\"id\":49,\"title\":\"Assay 2 - Test case ENA upload tool\"}]" + } + ], + "technologyPlatform": "", + "characteristicCategories": [ + { + "@id": "#characteristic_category/title_671", + "characteristicType": { + "annotationValue": "title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/library_name_673", + "characteristicType": { + "annotationValue": "library_name", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/accession_681", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_682", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_683", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "samples": [], + "otherMaterials": [ + { + "@id": "#other_material/227", + "name": "library_construction_protocol_1", + "type": "library_construction_protocol", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_671" + }, + "value": { + "annotationValue": "experiment_title_1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/library_name_673" + }, + "value": { + "annotationValue": "library_name_1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_681" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_682" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_683" + }, + "value": { + "annotationValue": "add", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/224" + } + ] + }, + { + "@id": "#other_material/228", + "name": "library_construction_protocol_2", + "type": "library_construction_protocol", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_671" + }, + "value": { + "annotationValue": "experiment_title_2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/library_name_673" + }, + "value": { + "annotationValue": "library_name_2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_681" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_682" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_683" + }, + "value": { + "annotationValue": "add", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/225" + } + ] + }, + { + "@id": "#other_material/229", + "name": "library_construction_protocol_2", + "type": "library_construction_protocol", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_671" + }, + "value": { + "annotationValue": "experiment_title_3", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/library_name_673" + }, + "value": { + "annotationValue": "library_name_2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_681" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_682" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_683" + }, + "value": { + "annotationValue": "add", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/225" + } + ] + } + ] + }, + "processSequence": [ + { + "@id": "#process/library_construction/227", + "name": "", + "executesProtocol": { + "@id": "#protocol/_48" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/672" + }, + "value": { + "annotationValue": "design_description_1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/674" + }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/675" + }, + "value": { + "annotationValue": "WGA", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/676" + }, + "value": { + "annotationValue": "RANDOM", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/677" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/678" + }, + "value": { + "annotationValue": "250", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/679" + }, + "value": { + "annotationValue": "LS454", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/680" + }, + "value": { + "annotationValue": "454 GS 20", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "inputs": [ + { + "@id": "#sample/224" + } + ], + "outputs": [ + { + "@id": "#other_material/227" + } + ], + "previousProcess": { + "@id": "#process/sample_collection/227" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/227" + } + }, + { + "@id": "#process/library_construction/228", + "name": "", + "executesProtocol": { + "@id": "#protocol/_48" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/672" + }, + "value": { + "annotationValue": "design_description_2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/674" + }, + "value": { + "annotationValue": "TRANSCRIPTOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/675" + }, + "value": { + "annotationValue": "RNA-Seq", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/676" + }, + "value": { + "annotationValue": "repeat fractionation", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/677" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/678" + }, + "value": { + "annotationValue": "None", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/679" + }, + "value": { + "annotationValue": "Illumina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/680" + }, + "value": { + "annotationValue": "Illumina Genome Analyzer", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "inputs": [ + { + "@id": "#sample/225" + } + ], + "outputs": [ + { + "@id": "#other_material/228" + } + ], + "previousProcess": { + "@id": "#process/sample_collection/228" + }, + "nextProcess": {} + }, + { + "@id": "#process/library_construction/229", + "name": "", + "executesProtocol": { + "@id": "#protocol/_48" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/672" + }, + "value": { + "annotationValue": "design_description_3", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/674" + }, + "value": { + "annotationValue": "TRANSCRIPTOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/675" + }, + "value": { + "annotationValue": "RNA-Seq", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/676" + }, + "value": { + "annotationValue": "repeat fractionation", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/677" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/678" + }, + "value": { + "annotationValue": "None", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/679" + }, + "value": { + "annotationValue": "Illumina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/680" + }, + "value": { + "annotationValue": "Illumina Genome Analyzer", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "inputs": [ + { + "@id": "#sample/225" + } + ], + "outputs": [ + { + "@id": "#other_material/229" + } + ], + "previousProcess": { + "@id": "#process/sample_collection/229" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/229" + } + }, + { + "@id": "#process/nucleic_acid_sequencing/233_234", + "name": "", + "executesProtocol": { + "@id": "#protocol/_49" + }, + "parameterValues": [], + "performer": "", + "date": "", + "inputs": [ + { + "@id": "#other_material/227" + } + ], + "outputs": [ + { + "@id": "#data_file/233" + }, + { + "@id": "#data_file/234" + } + ], + "previousProcess": { + "@id": "#process/library_construction/233" + }, + "nextProcess": {} + }, + { + "@id": "#process/nucleic_acid_sequencing/235", + "name": "", + "executesProtocol": { + "@id": "#protocol/_49" + }, + "parameterValues": [], + "performer": "", + "date": "", + "inputs": [ + { + "@id": "#other_material/229" + } + ], + "outputs": [ + { + "@id": "#data_file/235" + } + ], + "previousProcess": { + "@id": "#process/library_construction/235" + }, + "nextProcess": {} + } + ], + "dataFiles": [ + { + "@id": "#data_file/233", + "name": "RD file 1a", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "ENA_TEST2.R1.fastq.gz" + }, + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "add" + } + ] + }, + { + "@id": "#data_file/234", + "name": "RD file 1b", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "ENA_TEST2.R2.fastq.gz" + }, + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "add" + } + ] + }, + { + "@id": "#data_file/235", + "name": "RD file 3", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "ENA_TEST1.R1.fastq.gz" + }, + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "add" + } + ] + } + ], + "unitCategories": [] + } + ], + "factors": [], + "unitCategories": [] + } + ] +} \ No newline at end of file From 5883f7ca8aa7ccd0cb2ca05ec235e655d9ca1cec Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Wed, 8 Nov 2023 18:42:43 +0100 Subject: [PATCH 47/62] Fixed typo in example data --- example_read_isa_json.ipynb | 14 +++++++------- tests/test_data/simple_test_case_v2.json | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/example_read_isa_json.ipynb b/example_read_isa_json.ipynb index 0ffbaed..5da83f6 100644 --- a/example_read_isa_json.ipynb +++ b/example_read_isa_json.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -56,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -80,7 +80,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -99,7 +99,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -309,7 +309,7 @@ " GENOMIC\n", " WGA\n", " RANDOM\n", - " SINGLE\n", + " PAIRED\n", " 250\n", " LS454\n", " 454 GS 20\n", @@ -368,7 +368,7 @@ "2 experiment_title_3 add design_description_3 \n", "\n", " library_source library_strategy library_selection library_layout \\\n", - "0 GENOMIC WGA RANDOM SINGLE \n", + "0 GENOMIC WGA RANDOM PAIRED \n", "1 TRANSCRIPTOMIC RNA-Seq repeat fractionation SINGLE \n", "2 TRANSCRIPTOMIC RNA-Seq repeat fractionation SINGLE \n", "\n", diff --git a/tests/test_data/simple_test_case_v2.json b/tests/test_data/simple_test_case_v2.json index 2f6a1d4..2a1e878 100644 --- a/tests/test_data/simple_test_case_v2.json +++ b/tests/test_data/simple_test_case_v2.json @@ -1107,7 +1107,7 @@ "@id": "#parameter/677" }, "value": { - "annotationValue": "SINGLE", + "annotationValue": "PAIRED", "termSource": "", "termAccession": "" }, From 46b9afe6ffc15f0f117216a02a81a7a9aee8d000 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Wed, 8 Nov 2023 20:25:44 +0100 Subject: [PATCH 48/62] Adapt `get_parameter_values` for multi-output process --- ena_upload/json_parsing/ena_experiment.py | 13 +- ena_upload/json_parsing/ena_sample.py | 20 +- ena_upload/json_parsing/other_material.py | 1 - ..._multi_assay_stream_investigation_v2.json} | 383 ++++++++---------- 4 files changed, 183 insertions(+), 234 deletions(-) rename tests/test_data/{multi_study_multi_assay_stream_investigation.json => multi_study_multi_assay_stream_investigation_v2.json} (97%) diff --git a/ena_upload/json_parsing/ena_experiment.py b/ena_upload/json_parsing/ena_experiment.py index c177213..d5695b6 100644 --- a/ena_upload/json_parsing/ena_experiment.py +++ b/ena_upload/json_parsing/ena_experiment.py @@ -150,15 +150,16 @@ def get_parameter_values( """ param_vals = [] parameters = fetch_parameters(study_protocols_dict) - for ps in assay_stream["processSequence"]: - sample_id = clip_off_prefix(ps["@id"]) + for process in assay_stream["processSequence"]: + sample_ids = [clip_off_prefix(output["@id"]) for output in process["outputs"]] parameter_values = [ ParameterValue.from_dict(parameter_value, parameters) - for parameter_value in ps["parameterValues"] + for parameter_value in process["parameterValues"] ] - param_vals.append( - {"sample_id": sample_id, "parameter_values": parameter_values} - ) + for sample_id in sample_ids: + param_vals.append( + {"sample_id": sample_id, "parameter_values": parameter_values} + ) return param_vals diff --git a/ena_upload/json_parsing/ena_sample.py b/ena_upload/json_parsing/ena_sample.py index b68beba..ad071f5 100644 --- a/ena_upload/json_parsing/ena_sample.py +++ b/ena_upload/json_parsing/ena_sample.py @@ -1,9 +1,12 @@ from typing import List, Dict -from ena_upload.json_parsing.characteristic import SampleCharacteristic +from ena_upload.json_parsing.characteristic import SampleCharacteristic, ParameterValue from pandas import DataFrame -from ena_upload.json_parsing.ena_std_lib import clip_off_prefix, fetch_study_comment_by_name +from ena_upload.json_parsing.ena_std_lib import ( + clip_off_prefix, + fetch_study_comment_by_name, +) def fetch_characteristic_categories(study_dict: Dict) -> Dict: @@ -92,14 +95,21 @@ class EnaSample: prefix: str = "ena_sample_alias_prefix" - def __init__(self, characteristics: List[SampleCharacteristic], alias: str) -> None: + def __init__( + self, + characteristics: List[SampleCharacteristic], + parameter_values: List[ParameterValue], + alias: str, + ) -> None: self.alias = alias self.characteristics = characteristics + self.parameter_values = parameter_values def to_dict(self) -> Dict: return { "alias": self.alias, "characteristics": [char.to_dict() for char in self.characteristics], + "parameter_values": [pv.to_dict() for pv in self.parameter_values], } @classmethod @@ -142,6 +152,7 @@ def from_study_dict(self, study_dict: Dict) -> None: EnaSample( alias=sample_alias(sd["id"], study_alias_prefix), characteristics=sd["characteristics"], + parameter_values=[], # TODO Add functionality for parameter values in samples ) for sd in samples_data ] @@ -162,6 +173,9 @@ def export_samples_to_dataframe(samples: List[EnaSample]): characteristics = sample_dict.pop("characteristics") for char in characteristics: sample_dict.update({char["category"]["name"]: char["value"]}) + parameter_values = sample_dict.pop("parameter_values") + for pv in parameter_values: + sample_dict.update({pv["category"]["name"]: pv["value"]}) flat_dicts.append(sample_dict) return DataFrame.from_dict(flat_dicts) diff --git a/ena_upload/json_parsing/other_material.py b/ena_upload/json_parsing/other_material.py index 9c8ba31..e117285 100644 --- a/ena_upload/json_parsing/other_material.py +++ b/ena_upload/json_parsing/other_material.py @@ -1,6 +1,5 @@ from ena_upload.json_parsing.characteristic import ( IsaBase, - ParameterValue, OtherMaterialCharacteristic, ) diff --git a/tests/test_data/multi_study_multi_assay_stream_investigation.json b/tests/test_data/multi_study_multi_assay_stream_investigation_v2.json similarity index 97% rename from tests/test_data/multi_study_multi_assay_stream_investigation.json rename to tests/test_data/multi_study_multi_assay_stream_investigation_v2.json index 5f47bb0..789bda2 100644 --- a/tests/test_data/multi_study_multi_assay_stream_investigation.json +++ b/tests/test_data/multi_study_multi_assay_stream_investigation_v2.json @@ -1,15 +1,15 @@ { "identifier": "", - "title": "Test Investigation original", - "description": "This investigation uses the Standard ENA templates.", + "title": "Multi study - multi assay stream Investigation", + "description": "Investigation with multiple studies and assay streams.", "submissionDate": "", "publicReleaseDate": "", "ontologySourceReferences": [], - "filename": "Test Investigation original.txt", + "filename": "Multi study - multi assay stream Investigation.txt", "comments": [ { "name": "ISAjson export time", - "value": "2023-10-13T06:24:54Z" + "value": "2023-11-08T18:42:37Z" }, { "name": "SEEK Project name", @@ -28,10 +28,10 @@ "people": [ { "@id": "#people/4", - "lastName": "De Pelseneer", + "lastName": "DP", "firstName": "Kevin", "midInitials": "", - "email": "kevin.depelseneer@psb.ugent.be", + "email": "kevin.dp@mail.com", "phone": "", "fax": "", "address": "", @@ -67,12 +67,12 @@ "value": "https://datahub.elixir-belgium.org/samples/" }, { - "@id": "#study_comment/19_255b7d70-4bbf-013c-6732-7a163e608de1", + "@id": "#study_comment/19_830dd160-6094-013c-9ed7-7a163e608de1", "name": "SEEK Study ID", "value": "19" }, { - "@id": "#study_comment/19_255b8560-4bbf-013c-6733-7a163e608de1", + "@id": "#study_comment/19_830dd890-6094-013c-9ed8-7a163e608de1", "name": "SEEK creation date", "value": "2023-10-09T13:39:46Z" } @@ -128,10 +128,10 @@ "people": [ { "@id": "#people/4", - "lastName": "De Pelseneer", + "lastName": "DP", "firstName": "Kevin", "midInitials": "", - "email": "kevin.depelseneer@psb.ugent.be", + "email": "kevin.dp@mail.com", "phone": "", "fax": "", "address": "", @@ -1152,7 +1152,7 @@ ], "processSequence": [ { - "@id": "#process/sample_collection/142", + "@id": "#process/sample_collection/142_143", "name": "", "executesProtocol": { "@id": "#protocol/3_19" @@ -1160,8 +1160,6 @@ "parameterValues": [], "performer": "", "date": "", - "previousProcess": {}, - "nextProcess": {}, "inputs": [ { "@id": "#source/140" @@ -1170,33 +1168,14 @@ "outputs": [ { "@id": "#sample/142" - } - ] - }, - { - "@id": "#process/sample_collection/143", - "name": "", - "executesProtocol": { - "@id": "#protocol/3_19" - }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [ - { - "@id": "#source/140" - } - ], - "outputs": [ + }, { "@id": "#sample/143" } ] }, { - "@id": "#process/sample_collection/144", + "@id": "#process/sample_collection/144_145_196", "name": "", "executesProtocol": { "@id": "#protocol/3_19" @@ -1204,8 +1183,6 @@ "parameterValues": [], "performer": "", "date": "", - "previousProcess": {}, - "nextProcess": {}, "inputs": [ { "@id": "#source/141" @@ -1214,48 +1191,10 @@ "outputs": [ { "@id": "#sample/144" - } - ] - }, - { - "@id": "#process/sample_collection/145", - "name": "", - "executesProtocol": { - "@id": "#protocol/3_19" - }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [ - { - "@id": "#source/141" - } - ], - "outputs": [ + }, { "@id": "#sample/145" - } - ] - }, - { - "@id": "#process/sample_collection/196", - "name": "", - "executesProtocol": { - "@id": "#protocol/3_19" - }, - "parameterValues": [], - "performer": "", - "date": "", - "previousProcess": {}, - "nextProcess": {}, - "inputs": [ - { - "@id": "#source/141" - } - ], - "outputs": [ + }, { "@id": "#sample/196" } @@ -1797,12 +1736,6 @@ ], "performer": "", "date": "", - "previousProcess": { - "@id": "#process/sample_collection/146" - }, - "nextProcess": { - "@id": "#process/nucleic_acid_sequencing/146" - }, "inputs": [ { "@id": "#sample/142" @@ -1810,9 +1743,15 @@ ], "outputs": [ { - "@id": "#sample/146" + "@id": "#other_material/146" } - ] + ], + "previousProcess": { + "@id": "#process/sample_collection/146" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/146" + } }, { "@id": "#process/library_construction/147", @@ -1959,12 +1898,6 @@ ], "performer": "", "date": "", - "previousProcess": { - "@id": "#process/sample_collection/147" - }, - "nextProcess": { - "@id": "#process/nucleic_acid_sequencing/147" - }, "inputs": [ { "@id": "#sample/143" @@ -1972,9 +1905,15 @@ ], "outputs": [ { - "@id": "#sample/147" + "@id": "#other_material/147" } - ] + ], + "previousProcess": { + "@id": "#process/sample_collection/147" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/147" + } }, { "@id": "#process/library_construction/148", @@ -2121,12 +2060,6 @@ ], "performer": "", "date": "", - "previousProcess": { - "@id": "#process/sample_collection/148" - }, - "nextProcess": { - "@id": "#process/nucleic_acid_sequencing/148" - }, "inputs": [ { "@id": "#sample/144" @@ -2134,9 +2067,15 @@ ], "outputs": [ { - "@id": "#sample/148" + "@id": "#other_material/148" } - ] + ], + "previousProcess": { + "@id": "#process/sample_collection/148" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/148" + } }, { "@id": "#process/library_construction/149", @@ -2283,12 +2222,6 @@ ], "performer": "", "date": "", - "previousProcess": { - "@id": "#process/sample_collection/149" - }, - "nextProcess": { - "@id": "#process/nucleic_acid_sequencing/149" - }, "inputs": [ { "@id": "#sample/145" @@ -2296,9 +2229,15 @@ ], "outputs": [ { - "@id": "#sample/149" + "@id": "#other_material/149" } - ] + ], + "previousProcess": { + "@id": "#process/sample_collection/149" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/149" + } }, { "@id": "#process/nucleic_acid_sequencing/150", @@ -2309,10 +2248,6 @@ "parameterValues": [], "performer": "", "date": "", - "previousProcess": { - "@id": "#process/library_construction/150" - }, - "nextProcess": {}, "inputs": [ { "@id": "#other_material/146" @@ -2320,9 +2255,13 @@ ], "outputs": [ { - "@id": "#sample/150" + "@id": "#data_file/150" } - ] + ], + "previousProcess": { + "@id": "#process/library_construction/150" + }, + "nextProcess": {} }, { "@id": "#process/nucleic_acid_sequencing/151", @@ -2333,10 +2272,6 @@ "parameterValues": [], "performer": "", "date": "", - "previousProcess": { - "@id": "#process/library_construction/151" - }, - "nextProcess": {}, "inputs": [ { "@id": "#other_material/147" @@ -2344,9 +2279,13 @@ ], "outputs": [ { - "@id": "#sample/151" + "@id": "#data_file/151" } - ] + ], + "previousProcess": { + "@id": "#process/library_construction/151" + }, + "nextProcess": {} }, { "@id": "#process/nucleic_acid_sequencing/152", @@ -2357,10 +2296,6 @@ "parameterValues": [], "performer": "", "date": "", - "previousProcess": { - "@id": "#process/library_construction/152" - }, - "nextProcess": {}, "inputs": [ { "@id": "#other_material/148" @@ -2368,9 +2303,13 @@ ], "outputs": [ { - "@id": "#sample/152" + "@id": "#data_file/152" } - ] + ], + "previousProcess": { + "@id": "#process/library_construction/152" + }, + "nextProcess": {} }, { "@id": "#process/nucleic_acid_sequencing/153", @@ -2381,10 +2320,6 @@ "parameterValues": [], "performer": "", "date": "", - "previousProcess": { - "@id": "#process/library_construction/153" - }, - "nextProcess": {}, "inputs": [ { "@id": "#other_material/149" @@ -2392,9 +2327,13 @@ ], "outputs": [ { - "@id": "#sample/153" + "@id": "#data_file/153" } - ] + ], + "previousProcess": { + "@id": "#process/library_construction/153" + }, + "nextProcess": {} } ], "dataFiles": [ @@ -3059,12 +2998,6 @@ ], "performer": "", "date": "", - "previousProcess": { - "@id": "#process/sample_collection/154" - }, - "nextProcess": { - "@id": "#process/nucleic_acid_sequencing/154" - }, "inputs": [ { "@id": "#sample/142" @@ -3072,9 +3005,15 @@ ], "outputs": [ { - "@id": "#sample/154" + "@id": "#other_material/154" } - ] + ], + "previousProcess": { + "@id": "#process/sample_collection/154" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/154" + } }, { "@id": "#process/library_construction/155", @@ -3221,12 +3160,6 @@ ], "performer": "", "date": "", - "previousProcess": { - "@id": "#process/sample_collection/155" - }, - "nextProcess": { - "@id": "#process/nucleic_acid_sequencing/155" - }, "inputs": [ { "@id": "#sample/143" @@ -3234,9 +3167,15 @@ ], "outputs": [ { - "@id": "#sample/155" + "@id": "#other_material/155" } - ] + ], + "previousProcess": { + "@id": "#process/sample_collection/155" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/155" + } }, { "@id": "#process/library_construction/156", @@ -3383,12 +3322,6 @@ ], "performer": "", "date": "", - "previousProcess": { - "@id": "#process/sample_collection/156" - }, - "nextProcess": { - "@id": "#process/nucleic_acid_sequencing/156" - }, "inputs": [ { "@id": "#sample/144" @@ -3396,9 +3329,15 @@ ], "outputs": [ { - "@id": "#sample/156" + "@id": "#other_material/156" } - ] + ], + "previousProcess": { + "@id": "#process/sample_collection/156" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/156" + } }, { "@id": "#process/library_construction/157", @@ -3545,12 +3484,6 @@ ], "performer": "", "date": "", - "previousProcess": { - "@id": "#process/sample_collection/157" - }, - "nextProcess": { - "@id": "#process/nucleic_acid_sequencing/157" - }, "inputs": [ { "@id": "#sample/145" @@ -3558,9 +3491,15 @@ ], "outputs": [ { - "@id": "#sample/157" + "@id": "#other_material/157" } - ] + ], + "previousProcess": { + "@id": "#process/sample_collection/157" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/157" + } }, { "@id": "#process/nucleic_acid_sequencing/158", @@ -3571,10 +3510,6 @@ "parameterValues": [], "performer": "", "date": "", - "previousProcess": { - "@id": "#process/library_construction/158" - }, - "nextProcess": {}, "inputs": [ { "@id": "#other_material/154" @@ -3582,9 +3517,13 @@ ], "outputs": [ { - "@id": "#sample/158" + "@id": "#data_file/158" } - ] + ], + "previousProcess": { + "@id": "#process/library_construction/158" + }, + "nextProcess": {} }, { "@id": "#process/nucleic_acid_sequencing/159", @@ -3595,10 +3534,6 @@ "parameterValues": [], "performer": "", "date": "", - "previousProcess": { - "@id": "#process/library_construction/159" - }, - "nextProcess": {}, "inputs": [ { "@id": "#other_material/155" @@ -3606,9 +3541,13 @@ ], "outputs": [ { - "@id": "#sample/159" + "@id": "#data_file/159" } - ] + ], + "previousProcess": { + "@id": "#process/library_construction/159" + }, + "nextProcess": {} }, { "@id": "#process/nucleic_acid_sequencing/160", @@ -3619,10 +3558,6 @@ "parameterValues": [], "performer": "", "date": "", - "previousProcess": { - "@id": "#process/library_construction/160" - }, - "nextProcess": {}, "inputs": [ { "@id": "#other_material/156" @@ -3630,9 +3565,13 @@ ], "outputs": [ { - "@id": "#sample/160" + "@id": "#data_file/160" } - ] + ], + "previousProcess": { + "@id": "#process/library_construction/160" + }, + "nextProcess": {} }, { "@id": "#process/nucleic_acid_sequencing/161", @@ -3643,10 +3582,6 @@ "parameterValues": [], "performer": "", "date": "", - "previousProcess": { - "@id": "#process/library_construction/161" - }, - "nextProcess": {}, "inputs": [ { "@id": "#other_material/157" @@ -3654,9 +3589,13 @@ ], "outputs": [ { - "@id": "#sample/161" + "@id": "#data_file/161" } - ] + ], + "previousProcess": { + "@id": "#process/library_construction/161" + }, + "nextProcess": {} } ], "dataFiles": [ @@ -3805,12 +3744,12 @@ "value": "https://datahub.elixir-belgium.org/samples/" }, { - "@id": "#study_comment/20_259d4380-4bbf-013c-6734-7a163e608de1", + "@id": "#study_comment/20_836d8d10-6094-013c-9ed9-7a163e608de1", "name": "SEEK Study ID", "value": "20" }, { - "@id": "#study_comment/20_259d4a20-4bbf-013c-6735-7a163e608de1", + "@id": "#study_comment/20_836d93c0-6094-013c-9eda-7a163e608de1", "name": "SEEK creation date", "value": "2023-10-09T14:34:40Z" } @@ -3843,10 +3782,10 @@ "people": [ { "@id": "#people/4", - "lastName": "De Pelseneer", + "lastName": "DP", "firstName": "Kevin", "midInitials": "", - "email": "kevin.depelseneer@psb.ugent.be", + "email": "kevin.dp@mail.com", "phone": "", "fax": "", "address": "", @@ -4402,8 +4341,6 @@ "parameterValues": [], "performer": "", "date": "", - "previousProcess": {}, - "nextProcess": {}, "inputs": [ { "@id": "#source/162" @@ -4424,8 +4361,6 @@ "parameterValues": [], "performer": "", "date": "", - "previousProcess": {}, - "nextProcess": {}, "inputs": [ { "@id": "#source/163" @@ -4829,12 +4764,6 @@ ], "performer": "", "date": "", - "previousProcess": { - "@id": "#process/sample_collection/166" - }, - "nextProcess": { - "@id": "#process/nucleic_acid_sequencing/166" - }, "inputs": [ { "@id": "#sample/164" @@ -4842,9 +4771,15 @@ ], "outputs": [ { - "@id": "#sample/166" + "@id": "#other_material/166" } - ] + ], + "previousProcess": { + "@id": "#process/sample_collection/166" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/166" + } }, { "@id": "#process/library_construction/167", @@ -4991,12 +4926,6 @@ ], "performer": "", "date": "", - "previousProcess": { - "@id": "#process/sample_collection/167" - }, - "nextProcess": { - "@id": "#process/nucleic_acid_sequencing/167" - }, "inputs": [ { "@id": "#sample/165" @@ -5004,9 +4933,15 @@ ], "outputs": [ { - "@id": "#sample/167" + "@id": "#other_material/167" } - ] + ], + "previousProcess": { + "@id": "#process/sample_collection/167" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/167" + } }, { "@id": "#process/nucleic_acid_sequencing/168", @@ -5017,10 +4952,6 @@ "parameterValues": [], "performer": "", "date": "", - "previousProcess": { - "@id": "#process/library_construction/168" - }, - "nextProcess": {}, "inputs": [ { "@id": "#other_material/166" @@ -5028,9 +4959,13 @@ ], "outputs": [ { - "@id": "#sample/168" + "@id": "#data_file/168" } - ] + ], + "previousProcess": { + "@id": "#process/library_construction/168" + }, + "nextProcess": {} }, { "@id": "#process/nucleic_acid_sequencing/169", @@ -5041,10 +4976,6 @@ "parameterValues": [], "performer": "", "date": "", - "previousProcess": { - "@id": "#process/library_construction/169" - }, - "nextProcess": {}, "inputs": [ { "@id": "#other_material/167" @@ -5052,9 +4983,13 @@ ], "outputs": [ { - "@id": "#sample/169" + "@id": "#data_file/169" } - ] + ], + "previousProcess": { + "@id": "#process/library_construction/169" + }, + "nextProcess": {} } ], "dataFiles": [ From e9b913d555e86179678da9f2a7391c07b0cb6821 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Wed, 8 Nov 2023 20:34:00 +0100 Subject: [PATCH 49/62] Move `get_parameter_values` and `fetch_parameters` to shared ena_std_lib --- ena_upload/json_parsing/ena_experiment.py | 53 ++--------------------- ena_upload/json_parsing/ena_std_lib.py | 48 ++++++++++++++++++++ 2 files changed, 52 insertions(+), 49 deletions(-) diff --git a/ena_upload/json_parsing/ena_experiment.py b/ena_upload/json_parsing/ena_experiment.py index d5695b6..c88dd91 100644 --- a/ena_upload/json_parsing/ena_experiment.py +++ b/ena_upload/json_parsing/ena_experiment.py @@ -6,13 +6,13 @@ fetch_assay_comment_by_name, get_assay_sample_associations, clip_off_prefix, + get_parameter_values, ) from ena_upload.json_parsing.characteristic import ( IsaBase, OtherMaterialCharacteristic, ParameterValue, ) -from ena_upload.json_parsing.ena_sample import EnaSample from ena_upload.json_parsing.other_material import OtherMaterial @@ -116,53 +116,6 @@ def get_derived_sample_alias( return assoc_sample_ids -def fetch_parameters(protocol_dict: Dict[str, str]) -> List[Dict[str, str]]: - """Fetches the parameters from a protocol dictionary. - - Args: - protocol_dict (Dict[str, str]): protocol dictionary - - Returns: - List[Dict[str, str]]: Resulting list of parameters - """ - parameters = [] - for protocol in protocol_dict: - for parameter in protocol["parameters"]: - parameters.append( - { - "id": parameter["@id"], - "name": parameter["parameterName"]["annotationValue"], - } - ) - return parameters - - -def get_parameter_values( - assay_stream: Dict[str, str], study_protocols_dict: Dict[str, str] -) -> Dict[str, str]: - """Returns all parameter values from a study dictionary. - - Args: - study_dict (Dict[str, str]): Input study dictionary - - Returns: - Dict[str, str]: Resulting dictionary of parameter values. - """ - param_vals = [] - parameters = fetch_parameters(study_protocols_dict) - for process in assay_stream["processSequence"]: - sample_ids = [clip_off_prefix(output["@id"]) for output in process["outputs"]] - parameter_values = [ - ParameterValue.from_dict(parameter_value, parameters) - for parameter_value in process["parameterValues"] - ] - for sample_id in sample_ids: - param_vals.append( - {"sample_id": sample_id, "parameter_values": parameter_values} - ) - return param_vals - - class EnaExperiment(IsaBase): """ Generates an Experiment object, compliant to the requirements of ENA @@ -222,7 +175,9 @@ def from_assay_stream( """ other_materials = get_other_materials(assay_stream) - parameter_values = get_parameter_values(assay_stream, protocols_dict) + parameter_values = get_parameter_values( + assay_stream["processSequence"], protocols_dict + ) prefix = fetch_assay_comment_by_name(assay_stream, EnaExperiment.prefix)[ "value" ] diff --git a/ena_upload/json_parsing/ena_std_lib.py b/ena_upload/json_parsing/ena_std_lib.py index 0448bd4..b6920aa 100644 --- a/ena_upload/json_parsing/ena_std_lib.py +++ b/ena_upload/json_parsing/ena_std_lib.py @@ -1,5 +1,53 @@ from typing import Dict, List, Union import re +from ena_upload.json_parsing.characteristic import ParameterValue + + +def fetch_parameters(protocol_dict: Dict[str, str]) -> List[Dict[str, str]]: + """Fetches the parameters from a protocol dictionary. + + Args: + protocol_dict (Dict[str, str]): protocol dictionary + + Returns: + List[Dict[str, str]]: Resulting list of parameters + """ + parameters = [] + for protocol in protocol_dict: + for parameter in protocol["parameters"]: + parameters.append( + { + "id": parameter["@id"], + "name": parameter["parameterName"]["annotationValue"], + } + ) + return parameters + + +def get_parameter_values( + process_sequence: Dict[str, str], study_protocols_dict: Dict[str, str] +) -> Dict[str, str]: + """Returns all parameter values from a study dictionary. + + Args: + study_dict (Dict[str, str]): Input study dictionary + + Returns: + Dict[str, str]: Resulting dictionary of parameter values. + """ + param_vals = [] + parameters = fetch_parameters(study_protocols_dict) + for process in process_sequence: + sample_ids = [clip_off_prefix(output["@id"]) for output in process["outputs"]] + parameter_values = [ + ParameterValue.from_dict(parameter_value, parameters) + for parameter_value in process["parameterValues"] + ] + for sample_id in sample_ids: + param_vals.append( + {"sample_id": sample_id, "parameter_values": parameter_values} + ) + return param_vals def get_assay_sample_associations(assay_dict: Dict[str, str]) -> List[Dict[str, str]]: From cafcaad4363c6c84d1f433da42c1d1e33e5179bf Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Wed, 8 Nov 2023 21:31:16 +0100 Subject: [PATCH 50/62] Implementation of ParameterValues for samples --- ena_upload/json_parsing/ena_sample.py | 40 +- ..._multi_assay_stream_investigation_v3.json} | 2393 ++++++++++++++++- 2 files changed, 2417 insertions(+), 16 deletions(-) rename tests/test_data/{multi_study_multi_assay_stream_investigation_v2.json => multi_study_multi_assay_stream_investigation_v3.json} (68%) diff --git a/ena_upload/json_parsing/ena_sample.py b/ena_upload/json_parsing/ena_sample.py index ad071f5..37c326c 100644 --- a/ena_upload/json_parsing/ena_sample.py +++ b/ena_upload/json_parsing/ena_sample.py @@ -6,6 +6,7 @@ from ena_upload.json_parsing.ena_std_lib import ( clip_off_prefix, fetch_study_comment_by_name, + get_parameter_values, ) @@ -130,16 +131,33 @@ def from_study_dict(self, study_dict: Dict) -> None: } for source in study_dict["materials"]["sources"] ] - - samples_data = [ - { - "id": sample["@id"], - "name": sample["name"], - "characteristics": fetch_characteristics(sample, study_dict), - "source": associated_source(sample, study_dict), - } - for sample in study_dict["materials"]["samples"] - ] + parameter_values = get_parameter_values( + process_sequence=study_dict["processSequence"], + study_protocols_dict=study_dict["protocols"], + ) + + samples_data = [] + for sample in study_dict["materials"]["samples"]: + filtered_parameter_vals = list( + filter( + lambda pv: pv["sample_id"] == clip_off_prefix(sample["@id"]), + parameter_values, + ) + ) + parameter_vals = [] + for fpv in filtered_parameter_vals: + for pv in fpv["parameter_values"]: + parameter_vals.append(pv) + + samples_data.append( + { + "id": sample["@id"], + "name": sample["name"], + "characteristics": fetch_characteristics(sample, study_dict), + "parameter_values": parameter_vals, + "source": associated_source(sample, study_dict), + } + ) for sd in samples_data: for sc in associated_source_characteristics(sources_data, sd["source"]): @@ -152,7 +170,7 @@ def from_study_dict(self, study_dict: Dict) -> None: EnaSample( alias=sample_alias(sd["id"], study_alias_prefix), characteristics=sd["characteristics"], - parameter_values=[], # TODO Add functionality for parameter values in samples + parameter_values=sd["parameter_values"], ) for sd in samples_data ] diff --git a/tests/test_data/multi_study_multi_assay_stream_investigation_v2.json b/tests/test_data/multi_study_multi_assay_stream_investigation_v3.json similarity index 68% rename from tests/test_data/multi_study_multi_assay_stream_investigation_v2.json rename to tests/test_data/multi_study_multi_assay_stream_investigation_v3.json index 789bda2..a39c62b 100644 --- a/tests/test_data/multi_study_multi_assay_stream_investigation_v2.json +++ b/tests/test_data/multi_study_multi_assay_stream_investigation_v3.json @@ -9,7 +9,7 @@ "comments": [ { "name": "ISAjson export time", - "value": "2023-11-08T18:42:37Z" + "value": "2023-11-08T20:24:38Z" }, { "name": "SEEK Project name", @@ -67,12 +67,12 @@ "value": "https://datahub.elixir-belgium.org/samples/" }, { - "@id": "#study_comment/19_830dd160-6094-013c-9ed7-7a163e608de1", + "@id": "#study_comment/19_c394a480-60a2-013c-9ee3-7a163e608de1", "name": "SEEK Study ID", "value": "19" }, { - "@id": "#study_comment/19_830dd890-6094-013c-9ed8-7a163e608de1", + "@id": "#study_comment/19_c394aeb0-60a2-013c-9ee4-7a163e608de1", "name": "SEEK creation date", "value": "2023-10-09T13:39:46Z" } @@ -3744,12 +3744,12 @@ "value": "https://datahub.elixir-belgium.org/samples/" }, { - "@id": "#study_comment/20_836d8d10-6094-013c-9ed9-7a163e608de1", + "@id": "#study_comment/20_c4042c90-60a2-013c-9ee5-7a163e608de1", "name": "SEEK Study ID", "value": "20" }, { - "@id": "#study_comment/20_836d93c0-6094-013c-9eda-7a163e608de1", + "@id": "#study_comment/20_c40435b0-60a2-013c-9ee6-7a163e608de1", "name": "SEEK creation date", "value": "2023-10-09T14:34:40Z" } @@ -5061,6 +5061,2389 @@ ], "factors": [], "unitCategories": [] + }, + { + "identifier": "", + "title": "ENA Study 3", + "description": "", + "submissionDate": "", + "publicReleaseDate": "", + "filename": "ENA Study 3.txt", + "comments": [ + { + "@id": "#study_comment/27_16_49", + "name": "ena_sample_alias_prefix", + "value": "https://datahub.elixir-belgium.org/samples/" + }, + { + "@id": "#study_comment/27_c42d0d90-60a2-013c-9ee7-7a163e608de1", + "name": "SEEK Study ID", + "value": "27" + }, + { + "@id": "#study_comment/27_c42d1570-60a2-013c-9ee8-7a163e608de1", + "name": "SEEK creation date", + "value": "2023-11-08T20:00:56Z" + } + ], + "publications": [], + "people": [ + { + "@id": "#people/4", + "lastName": "DP", + "firstName": "Kevin", + "midInitials": "", + "email": "kevin.dp@mail.com", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { + "termAccession": "", + "termSource": "", + "annotationValue": "" + } + ], + "comments": [ + { + "@id": "", + "value": "", + "name": "" + } + ] + } + ], + "studyDesignDescriptors": [], + "characteristicCategories": [ + { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_737", + "characteristicType": { + "annotationValue": "geographic location (country and/or sea)", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/taxon_id_738", + "characteristicType": { + "annotationValue": "taxon_id", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/title_742", + "characteristicType": { + "annotationValue": "title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/sample_description_743", + "characteristicType": { + "annotationValue": "sample_description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/collection_date_744", + "characteristicType": { + "annotationValue": "collection date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/accession_745", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_746", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_747", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "sources": [ + { + "@id": "#source/236", + "name": "Source A", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_737" + }, + "value": { + "annotationValue": "Antarctica", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/taxon_id_738" + }, + "value": { + "annotationValue": "254564", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ], + "samples": [ + { + "@id": "#sample/237", + "name": "Sample 1", + "derivesFrom": [ + { + "@id": "#source/236" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_742" + }, + "value": { + "annotationValue": "Sample title 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_743" + }, + "value": { + "annotationValue": "This is sample 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_744" + }, + "value": { + "annotationValue": "2001", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_745" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_746" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_747" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/238", + "name": "Sample 2", + "derivesFrom": [ + { + "@id": "#source/236" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_742" + }, + "value": { + "annotationValue": "Sample title 2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_743" + }, + "value": { + "annotationValue": "This is sample 2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_744" + }, + "value": { + "annotationValue": "2002", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_745" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_746" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_747" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/239", + "name": "Sample 3", + "derivesFrom": [ + { + "@id": "#source/236" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_742" + }, + "value": { + "annotationValue": "Sample title 3", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_743" + }, + "value": { + "annotationValue": "This is sample 3", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_744" + }, + "value": { + "annotationValue": "2003", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_745" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_746" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_747" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + }, + { + "@id": "#sample/240", + "name": "Sample 4", + "derivesFrom": [ + { + "@id": "#source/236" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_742" + }, + "value": { + "annotationValue": "Sample title 4", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/sample_description_743" + }, + "value": { + "annotationValue": "This is sample 4", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_744" + }, + "value": { + "annotationValue": "2004", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_745" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_746" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_747" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ] + }, + "protocols": [ + { + "@id": "#protocol/_27", + "name": "sample collection", + "protocolType": { + "annotationValue": "sample collection", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [ + { + "@id": "#parameter/748", + "parameterName": { + "annotationValue": "Parameter Value 1", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/749", + "parameterName": { + "annotationValue": "Parameter Value 2", + "termAccession": "", + "termSource": "" + } + } + ], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_50", + "name": "library construction", + "protocolType": { + "annotationValue": "library construction", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [ + { + "@id": "#parameter/752", + "parameterName": { + "annotationValue": "library_construction_protocol", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/754", + "parameterName": { + "annotationValue": "design_description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/756", + "parameterName": { + "annotationValue": "library_source", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/757", + "parameterName": { + "annotationValue": "library_strategy", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/758", + "parameterName": { + "annotationValue": "library_selection", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/759", + "parameterName": { + "annotationValue": "library_layout", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/760", + "parameterName": { + "annotationValue": "insert_size", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/761", + "parameterName": { + "annotationValue": "platform", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/762", + "parameterName": { + "annotationValue": "instrument_model", + "termAccession": "", + "termSource": "" + } + } + ], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/_51", + "name": "nucleic acid sequencing", + "protocolType": { + "annotationValue": "nucleic acid sequencing", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + } + ], + "processSequence": [ + { + "@id": "#process/sample_collection/237_238", + "name": "", + "executesProtocol": { + "@id": "#protocol/_27" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/748" + }, + "value": { + "annotationValue": "A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/749" + }, + "value": { + "annotationValue": "C", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "inputs": [ + { + "@id": "#source/236" + } + ], + "outputs": [ + { + "@id": "#sample/237" + }, + { + "@id": "#sample/238" + } + ] + }, + { + "@id": "#process/sample_collection/239", + "name": "", + "executesProtocol": { + "@id": "#protocol/_27" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/748" + }, + "value": { + "annotationValue": "B", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/749" + }, + "value": { + "annotationValue": "C", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "inputs": [ + { + "@id": "#source/236" + } + ], + "outputs": [ + { + "@id": "#sample/239" + } + ] + }, + { + "@id": "#process/sample_collection/240", + "name": "", + "executesProtocol": { + "@id": "#protocol/_27" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/748" + }, + "value": { + "annotationValue": "A", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/749" + }, + "value": { + "annotationValue": "D", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "inputs": [ + { + "@id": "#source/236" + } + ], + "outputs": [ + { + "@id": "#sample/240" + } + ] + } + ], + "assays": [ + { + "@id": "#assay/50_51", + "filename": "a_extra_assay_stream.txt", + "measurementType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "technologyType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "comments": [ + { + "@id": "#assay_comment/27_50_51_17_50", + "name": "ena_study_title", + "value": "Extra study" + }, + { + "@id": "#assay_comment/27_50_51_17_51", + "name": "study_type", + "value": "Whole Genome Sequencing" + }, + { + "@id": "#assay_comment/27_50_51_17_52", + "name": "new_study_type", + "value": "" + }, + { + "@id": "#assay_comment/27_50_51_17_53", + "name": "ena_study_abstract", + "value": "blablabla" + }, + { + "@id": "#assay_comment/27_50_51_17_54", + "name": "assay_stream", + "value": "Extra assay stream" + }, + { + "@id": "#assay_comment/27_50_51_17_55", + "name": "ena_study_alias_prefix", + "value": "https://datahub.elixir-belgium.org/studies/" + }, + { + "@id": "#assay_comment/27_50_51_17_56", + "name": "ena_experiment_alias_prefix", + "value": "https://datahub.elixir-belgium.org/samples/" + }, + { + "@id": "#assay_comment/27_50_51_17_57", + "name": "ena_run_alias_prefix", + "value": "https://datahub.elixir-belgium.org/samples/" + }, + { + "@id": "#assay_comment/50_51", + "name": "linked_assays", + "value": "[{\"id\":50,\"title\":\"Assay 1\"},{\"id\":51,\"title\":\"Assay 2\"}]" + } + ], + "technologyPlatform": "", + "characteristicCategories": [ + { + "@id": "#characteristic_category/title_753", + "characteristicType": { + "annotationValue": "title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/accession_763", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_764", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_765", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "samples": [], + "otherMaterials": [ + { + "@id": "#other_material/241", + "name": "Library 1", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_753" + }, + "value": { + "annotationValue": "Library title 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_763" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_764" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_765" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/237" + } + ] + }, + { + "@id": "#other_material/242", + "name": "Library 2", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_753" + }, + "value": { + "annotationValue": "Library title 2", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_763" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_764" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_765" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/238" + } + ] + }, + { + "@id": "#other_material/243", + "name": "Library 3", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_753" + }, + "value": { + "annotationValue": "Library title 3", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_763" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_764" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_765" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/239" + } + ] + }, + { + "@id": "#other_material/244", + "name": "Library 4", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_753" + }, + "value": { + "annotationValue": "Library title 4", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_763" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_764" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_765" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/240" + } + ] + }, + { + "@id": "#other_material/245", + "name": "Library 5", + "type": "library_name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/title_753" + }, + "value": { + "annotationValue": "Library title 5", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_763" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_764" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_765" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#sample/240" + } + ] + } + ] + }, + "processSequence": [ + { + "@id": "#process/library_construction/241", + "name": "", + "executesProtocol": { + "@id": "#protocol/_50" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/752" + }, + "value": { + "annotationValue": "My library construction protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/754" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/756" + }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/757" + }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/758" + }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/759" + }, + "value": { + "annotationValue": "PAIRED", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/760" + }, + "value": { + "annotationValue": "1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/761" + }, + "value": { + "annotationValue": "LS454", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/762" + }, + "value": { + "annotationValue": "454 GS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "inputs": [ + { + "@id": "#sample/237" + } + ], + "outputs": [ + { + "@id": "#other_material/241" + } + ], + "previousProcess": { + "@id": "#process/sample_collection/241" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/241" + } + }, + { + "@id": "#process/library_construction/242", + "name": "", + "executesProtocol": { + "@id": "#protocol/_50" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/752" + }, + "value": { + "annotationValue": "My library construction protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/754" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/756" + }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/757" + }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/758" + }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/759" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/760" + }, + "value": { + "annotationValue": "1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/761" + }, + "value": { + "annotationValue": "LS454", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/762" + }, + "value": { + "annotationValue": "454 GS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "inputs": [ + { + "@id": "#sample/238" + } + ], + "outputs": [ + { + "@id": "#other_material/242" + } + ], + "previousProcess": { + "@id": "#process/sample_collection/242" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/242" + } + }, + { + "@id": "#process/library_construction/243", + "name": "", + "executesProtocol": { + "@id": "#protocol/_50" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/752" + }, + "value": { + "annotationValue": "My library construction protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/754" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/756" + }, + "value": { + "annotationValue": "GENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/757" + }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/758" + }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/759" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/760" + }, + "value": { + "annotationValue": "1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/761" + }, + "value": { + "annotationValue": "LS454", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/762" + }, + "value": { + "annotationValue": "454 GS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "inputs": [ + { + "@id": "#sample/239" + } + ], + "outputs": [ + { + "@id": "#other_material/243" + } + ], + "previousProcess": { + "@id": "#process/sample_collection/243" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/243" + } + }, + { + "@id": "#process/library_construction/244_245", + "name": "", + "executesProtocol": { + "@id": "#protocol/_50" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/752" + }, + "value": { + "annotationValue": "My library construction protocol", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/754" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/756" + }, + "value": { + "annotationValue": "METAGENOMIC", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/757" + }, + "value": { + "annotationValue": "WGS", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/758" + }, + "value": { + "annotationValue": "PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/759" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/760" + }, + "value": { + "annotationValue": "1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/761" + }, + "value": { + "annotationValue": "Illumina", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/762" + }, + "value": { + "annotationValue": "Illumina Genome Analyzer II", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "inputs": [ + { + "@id": "#sample/240" + } + ], + "outputs": [ + { + "@id": "#other_material/244" + }, + { + "@id": "#other_material/245" + } + ], + "previousProcess": { + "@id": "#process/sample_collection/244" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/244" + } + }, + { + "@id": "#process/nucleic_acid_sequencing/246_247", + "name": "", + "executesProtocol": { + "@id": "#protocol/_51" + }, + "parameterValues": [], + "performer": "", + "date": "", + "inputs": [ + { + "@id": "#other_material/241" + } + ], + "outputs": [ + { + "@id": "#data_file/246" + }, + { + "@id": "#data_file/247" + } + ], + "previousProcess": { + "@id": "#process/library_construction/246" + }, + "nextProcess": {} + }, + { + "@id": "#process/nucleic_acid_sequencing/248", + "name": "", + "executesProtocol": { + "@id": "#protocol/_51" + }, + "parameterValues": [], + "performer": "", + "date": "", + "inputs": [ + { + "@id": "#other_material/242" + } + ], + "outputs": [ + { + "@id": "#data_file/248" + } + ], + "previousProcess": { + "@id": "#process/library_construction/248" + }, + "nextProcess": {} + }, + { + "@id": "#process/nucleic_acid_sequencing/249", + "name": "", + "executesProtocol": { + "@id": "#protocol/_51" + }, + "parameterValues": [], + "performer": "", + "date": "", + "inputs": [ + { + "@id": "#other_material/243" + } + ], + "outputs": [ + { + "@id": "#data_file/249" + } + ], + "previousProcess": { + "@id": "#process/library_construction/249" + }, + "nextProcess": {} + }, + { + "@id": "#process/nucleic_acid_sequencing/250", + "name": "", + "executesProtocol": { + "@id": "#protocol/_51" + }, + "parameterValues": [], + "performer": "", + "date": "", + "inputs": [ + { + "@id": "#other_material/244" + } + ], + "outputs": [ + { + "@id": "#data_file/250" + } + ], + "previousProcess": { + "@id": "#process/library_construction/250" + }, + "nextProcess": {} + }, + { + "@id": "#process/nucleic_acid_sequencing/251", + "name": "", + "executesProtocol": { + "@id": "#protocol/_51" + }, + "parameterValues": [], + "performer": "", + "date": "", + "inputs": [ + { + "@id": "#other_material/245" + } + ], + "outputs": [ + { + "@id": "#data_file/251" + } + ], + "previousProcess": { + "@id": "#process/library_construction/251" + }, + "nextProcess": {} + } + ], + "dataFiles": [ + { + "@id": "#data_file/246", + "name": "data file 1", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data_file_1_A.fastq" + }, + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + }, + { + "@id": "#data_file/247", + "name": "data file 2", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data_file_1_B.fastq" + }, + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + }, + { + "@id": "#data_file/248", + "name": "data file 3", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data_file_3.fastq" + }, + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + }, + { + "@id": "#data_file/249", + "name": "data file 4", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data_file_4.fastq" + }, + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + }, + { + "@id": "#data_file/250", + "name": "data file 5", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data_file_5.fastq" + }, + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + }, + { + "@id": "#data_file/251", + "name": "data file 6", + "type": "Raw Data File", + "comments": [ + { + "name": "file_name", + "value": "data_file_6.fastq" + }, + { + "name": "file_type", + "value": "fastq" + }, + { + "name": "file checksum", + "value": "" + }, + { + "name": "accession", + "value": "" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + } + ] + } + ], + "unitCategories": [] + } + ], + "factors": [], + "unitCategories": [] } ] } \ No newline at end of file From 6e5e393d5c7068c593f2f552adc38de0c6fca124 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Thu, 9 Nov 2023 09:27:44 +0100 Subject: [PATCH 51/62] Replace NaN in dataframes by empty string --- ena_upload/json_parsing/ena_submission.py | 8 +- example_read_isa_json.ipynb | 633 ++++++++++++++++++---- 2 files changed, 533 insertions(+), 108 deletions(-) diff --git a/ena_upload/json_parsing/ena_submission.py b/ena_upload/json_parsing/ena_submission.py index 9ab92bb..ceadfb1 100644 --- a/ena_upload/json_parsing/ena_submission.py +++ b/ena_upload/json_parsing/ena_submission.py @@ -142,8 +142,8 @@ def generate_dataframes(self) -> Dict[str, DataFrame]: Dict[str, DataFrame]: resulting dictionary of DataFrames """ return { - "study": export_studies_to_dataframe(self.studies), - "sample": export_samples_to_dataframe(self.samples), - "experiment": export_experiments_to_dataframe(self.experiments), - "run": export_runs_to_dataframe(self.runs), + "study": export_studies_to_dataframe(self.studies).fillna(""), + "sample": export_samples_to_dataframe(self.samples).fillna(""), + "experiment": export_experiments_to_dataframe(self.experiments).fillna(""), + "run": export_runs_to_dataframe(self.runs).fillna(""), } diff --git a/example_read_isa_json.ipynb b/example_read_isa_json.ipynb index 5da83f6..6f1d6bc 100644 --- a/example_read_isa_json.ipynb +++ b/example_read_isa_json.ipynb @@ -42,7 +42,7 @@ "\n", "# Read json file\n", "isa_json_file = open(\n", - " \"tests/test_data/simple_test_case_v2.json\"\n", + " \"tests/test_data/multi_study_multi_assay_stream_investigation_v3.json\"\n", ")\n", "isa_json = json.load(isa_json_file)" ] @@ -68,6 +68,7 @@ "\n", "required_assays = [\n", " {\"assay_stream\": \"Ena stream 1\"},\n", + " {\"ena_study_title\": \"Extra study\"}\n", " ]" ] }, @@ -141,11 +142,20 @@ " \n", " \n", " 0\n", - " study_alias_48_49\n", - " study_title_1\n", - " Transcriptome Analysis\n", - " study_abstract_1\n", - " None\n", + " https://datahub.elixir-belgium.org/studies/27_28\n", + " Ena Study 1\n", + " Whole Genome Sequencing\n", + " This is Ena Study 1.\n", + " \n", + " 5,6\n", + " \n", + " \n", + " 1\n", + " https://datahub.elixir-belgium.org/studies/50_51\n", + " Extra study\n", + " Whole Genome Sequencing\n", + " blablabla\n", + " \n", " \n", " \n", " \n", @@ -153,11 +163,13 @@ "" ], "text/plain": [ - " alias title study_type study_abstract \\\n", - "0 study_alias_48_49 study_title_1 Transcriptome Analysis study_abstract_1 \n", + " alias title \\\n", + "0 https://datahub.elixir-belgium.org/studies/27_28 Ena Study 1 \n", + "1 https://datahub.elixir-belgium.org/studies/50_51 Extra study \n", "\n", - " new_study_type pubmed_id \n", - "0 None " + " study_type study_abstract new_study_type pubmed_id \n", + "0 Whole Genome Sequencing This is Ena Study 1. 5,6 \n", + "1 Whole Genome Sequencing blablabla " ] }, "metadata": {}, @@ -200,49 +212,167 @@ " status\n", " geographic location (country and/or sea)\n", " taxon_id\n", + " Parameter Value 1\n", + " Parameter Value 2\n", " \n", " \n", " \n", " \n", " 0\n", - " sample_alias_224\n", - " sample_title_1\n", - " sample_description_1\n", - " 2020-10-11\n", + " https://datahub.elixir-belgium.org/samples/142\n", + " Sample title 1\n", + " Sample description 1\n", + " 2023\n", + " \n", + " \n", + " \n", + " Afghanistan\n", + " 1234\n", " \n", " \n", - " add\n", - " Argentina\n", - " 9606\n", " \n", " \n", " 1\n", - " sample_alias_225\n", - " sample_title_2\n", - " sample_description_2\n", - " 2008-01-24\n", + " https://datahub.elixir-belgium.org/samples/143\n", + " Sample title 2\n", + " Sample description 2\n", + " 2022\n", + " \n", + " \n", + " \n", + " Afghanistan\n", + " 1234\n", + " \n", + " \n", + " \n", + " \n", + " 2\n", + " https://datahub.elixir-belgium.org/samples/144\n", + " Sample title 3\n", + " Sample description 3\n", + " 2021\n", + " \n", + " \n", + " \n", + " Albania\n", + " 2345\n", + " \n", + " \n", + " \n", + " \n", + " 3\n", + " https://datahub.elixir-belgium.org/samples/145\n", + " Sample title 4\n", + " Sample description 4\n", + " 2020\n", + " \n", + " \n", + " \n", + " Albania\n", + " 2345\n", " \n", " \n", - " add\n", - " Belgium\n", - " 646099\n", + " \n", + " \n", + " 4\n", + " https://datahub.elixir-belgium.org/samples/237\n", + " Sample title 1\n", + " This is sample 1\n", + " 2001\n", + " \n", + " \n", + " \n", + " Antarctica\n", + " 254564\n", + " A\n", + " C\n", + " \n", + " \n", + " 5\n", + " https://datahub.elixir-belgium.org/samples/238\n", + " Sample title 2\n", + " This is sample 2\n", + " 2002\n", + " \n", + " \n", + " \n", + " Antarctica\n", + " 254564\n", + " A\n", + " C\n", + " \n", + " \n", + " 6\n", + " https://datahub.elixir-belgium.org/samples/239\n", + " Sample title 3\n", + " This is sample 3\n", + " 2003\n", + " \n", + " \n", + " \n", + " Antarctica\n", + " 254564\n", + " B\n", + " C\n", + " \n", + " \n", + " 7\n", + " https://datahub.elixir-belgium.org/samples/240\n", + " Sample title 4\n", + " This is sample 4\n", + " 2004\n", + " \n", + " \n", + " \n", + " Antarctica\n", + " 254564\n", + " A\n", + " D\n", " \n", " \n", "\n", "" ], "text/plain": [ - " alias title sample_description collection date \\\n", - "0 sample_alias_224 sample_title_1 sample_description_1 2020-10-11 \n", - "1 sample_alias_225 sample_title_2 sample_description_2 2008-01-24 \n", + " alias title \\\n", + "0 https://datahub.elixir-belgium.org/samples/142 Sample title 1 \n", + "1 https://datahub.elixir-belgium.org/samples/143 Sample title 2 \n", + "2 https://datahub.elixir-belgium.org/samples/144 Sample title 3 \n", + "3 https://datahub.elixir-belgium.org/samples/145 Sample title 4 \n", + "4 https://datahub.elixir-belgium.org/samples/237 Sample title 1 \n", + "5 https://datahub.elixir-belgium.org/samples/238 Sample title 2 \n", + "6 https://datahub.elixir-belgium.org/samples/239 Sample title 3 \n", + "7 https://datahub.elixir-belgium.org/samples/240 Sample title 4 \n", + "\n", + " sample_description collection date accession submission date status \\\n", + "0 Sample description 1 2023 \n", + "1 Sample description 2 2022 \n", + "2 Sample description 3 2021 \n", + "3 Sample description 4 2020 \n", + "4 This is sample 1 2001 \n", + "5 This is sample 2 2002 \n", + "6 This is sample 3 2003 \n", + "7 This is sample 4 2004 \n", "\n", - " accession submission date status geographic location (country and/or sea) \\\n", - "0 add Argentina \n", - "1 add Belgium \n", + " geographic location (country and/or sea) taxon_id Parameter Value 1 \\\n", + "0 Afghanistan 1234 \n", + "1 Afghanistan 1234 \n", + "2 Albania 2345 \n", + "3 Albania 2345 \n", + "4 Antarctica 254564 A \n", + "5 Antarctica 254564 A \n", + "6 Antarctica 254564 B \n", + "7 Antarctica 254564 A \n", "\n", - " taxon_id \n", - "0 9606 \n", - "1 646099 " + " Parameter Value 2 \n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 C \n", + "5 C \n", + "6 C \n", + "7 D " ] }, "metadata": {}, @@ -284,6 +414,7 @@ " accession\n", " submission date\n", " status\n", + " library_construction_protocol\n", " design_description\n", " library_source\n", " library_strategy\n", @@ -297,85 +428,276 @@ " \n", " \n", " 0\n", - " experiment_alias_227\n", - " study_alias_48_49\n", - " sample_alias_224\n", - " library_name_1\n", - " experiment_title_1\n", + " https://datahub.elixir-belgium.org/samples/146\n", + " https://datahub.elixir-belgium.org/studies/27_28\n", + " https://datahub.elixir-belgium.org/samples/142\n", + " Library 1\n", + " Library title 1\n", + " \n", " \n", " \n", - " add\n", - " design_description_1\n", + " My special protocol 1\n", + " Library description 1\n", " GENOMIC\n", - " WGA\n", + " WGS\n", " RANDOM\n", - " PAIRED\n", - " 250\n", + " SINGLE\n", + " 123\n", " LS454\n", - " 454 GS 20\n", + " 454 GS\n", " \n", " \n", " 1\n", - " experiment_alias_228\n", - " study_alias_48_49\n", - " sample_alias_225\n", - " library_name_2\n", - " experiment_title_2\n", + " https://datahub.elixir-belgium.org/samples/147\n", + " https://datahub.elixir-belgium.org/studies/27_28\n", + " https://datahub.elixir-belgium.org/samples/143\n", + " Library 2\n", + " Library title 2\n", " \n", " \n", - " add\n", - " design_description_2\n", - " TRANSCRIPTOMIC\n", - " RNA-Seq\n", - " repeat fractionation\n", - " SINGLE\n", - " None\n", + " \n", + " My special protocol 2\n", + " Library description 2\n", + " GENOMIC SINGLE CELL\n", + " WGA\n", + " PCR\n", + " PAIRED\n", + " 234\n", " Illumina\n", " Illumina Genome Analyzer\n", " \n", " \n", " 2\n", - " experiment_alias_229\n", - " study_alias_48_49\n", - " sample_alias_225\n", - " library_name_2\n", - " experiment_title_3\n", + " https://datahub.elixir-belgium.org/samples/148\n", + " https://datahub.elixir-belgium.org/studies/27_28\n", + " https://datahub.elixir-belgium.org/samples/144\n", + " Library 3\n", + " Library title 3\n", + " \n", " \n", " \n", - " add\n", - " design_description_3\n", + " My special protocol 3\n", + " Library description 3\n", " TRANSCRIPTOMIC\n", + " WXS\n", + " RANDOM PCR\n", + " SINGLE\n", + " 345\n", + " PacBio\n", + " PacBio RS\n", + " \n", + " \n", + " 3\n", + " https://datahub.elixir-belgium.org/samples/149\n", + " https://datahub.elixir-belgium.org/studies/27_28\n", + " https://datahub.elixir-belgium.org/samples/145\n", + " Library 4\n", + " Library title 4\n", + " \n", + " \n", + " \n", + " My special protocol 4\n", + " Library description 4\n", + " TRANSCRIPTOMIC SINGLE CELL\n", " RNA-Seq\n", - " repeat fractionation\n", + " RT-PCR\n", + " PAIRED\n", + " 456\n", + " Themo Fisher Scientific\n", + " AB 3730xL Genetic Analyzer\n", + " \n", + " \n", + " 4\n", + " https://datahub.elixir-belgium.org/samples/241\n", + " https://datahub.elixir-belgium.org/studies/50_51\n", + " https://datahub.elixir-belgium.org/samples/237\n", + " Library 1\n", + " Library title 1\n", + " \n", + " \n", + " \n", + " My library construction protocol\n", + " \n", + " GENOMIC\n", + " WGS\n", + " PCR\n", + " PAIRED\n", + " 1\n", + " LS454\n", + " 454 GS\n", + " \n", + " \n", + " 5\n", + " https://datahub.elixir-belgium.org/samples/242\n", + " https://datahub.elixir-belgium.org/studies/50_51\n", + " https://datahub.elixir-belgium.org/samples/238\n", + " Library 2\n", + " Library title 2\n", + " \n", + " \n", + " \n", + " My library construction protocol\n", + " \n", + " GENOMIC\n", + " WGS\n", + " PCR\n", " SINGLE\n", - " None\n", + " 1\n", + " LS454\n", + " 454 GS\n", + " \n", + " \n", + " 6\n", + " https://datahub.elixir-belgium.org/samples/243\n", + " https://datahub.elixir-belgium.org/studies/50_51\n", + " https://datahub.elixir-belgium.org/samples/239\n", + " Library 3\n", + " Library title 3\n", + " \n", + " \n", + " \n", + " My library construction protocol\n", + " \n", + " GENOMIC\n", + " WGS\n", + " PCR\n", + " SINGLE\n", + " 1\n", + " LS454\n", + " 454 GS\n", + " \n", + " \n", + " 7\n", + " https://datahub.elixir-belgium.org/samples/244\n", + " https://datahub.elixir-belgium.org/studies/50_51\n", + " https://datahub.elixir-belgium.org/samples/240\n", + " Library 4\n", + " Library title 4\n", + " \n", + " \n", + " \n", + " My library construction protocol\n", + " \n", + " METAGENOMIC\n", + " WGS\n", + " PCR\n", + " SINGLE\n", + " 1\n", " Illumina\n", - " Illumina Genome Analyzer\n", + " Illumina Genome Analyzer II\n", + " \n", + " \n", + " 8\n", + " https://datahub.elixir-belgium.org/samples/245\n", + " https://datahub.elixir-belgium.org/studies/50_51\n", + " https://datahub.elixir-belgium.org/samples/240\n", + " Library 5\n", + " Library title 5\n", + " \n", + " \n", + " \n", + " My library construction protocol\n", + " \n", + " METAGENOMIC\n", + " WGS\n", + " PCR\n", + " SINGLE\n", + " 1\n", + " Illumina\n", + " Illumina Genome Analyzer II\n", " \n", " \n", "\n", "" ], "text/plain": [ - " alias study_alias sample_alias library_name \\\n", - "0 experiment_alias_227 study_alias_48_49 sample_alias_224 library_name_1 \n", - "1 experiment_alias_228 study_alias_48_49 sample_alias_225 library_name_2 \n", - "2 experiment_alias_229 study_alias_48_49 sample_alias_225 library_name_2 \n", + " alias \\\n", + "0 https://datahub.elixir-belgium.org/samples/146 \n", + "1 https://datahub.elixir-belgium.org/samples/147 \n", + "2 https://datahub.elixir-belgium.org/samples/148 \n", + "3 https://datahub.elixir-belgium.org/samples/149 \n", + "4 https://datahub.elixir-belgium.org/samples/241 \n", + "5 https://datahub.elixir-belgium.org/samples/242 \n", + "6 https://datahub.elixir-belgium.org/samples/243 \n", + "7 https://datahub.elixir-belgium.org/samples/244 \n", + "8 https://datahub.elixir-belgium.org/samples/245 \n", + "\n", + " study_alias \\\n", + "0 https://datahub.elixir-belgium.org/studies/27_28 \n", + "1 https://datahub.elixir-belgium.org/studies/27_28 \n", + "2 https://datahub.elixir-belgium.org/studies/27_28 \n", + "3 https://datahub.elixir-belgium.org/studies/27_28 \n", + "4 https://datahub.elixir-belgium.org/studies/50_51 \n", + "5 https://datahub.elixir-belgium.org/studies/50_51 \n", + "6 https://datahub.elixir-belgium.org/studies/50_51 \n", + "7 https://datahub.elixir-belgium.org/studies/50_51 \n", + "8 https://datahub.elixir-belgium.org/studies/50_51 \n", + "\n", + " sample_alias library_name \\\n", + "0 https://datahub.elixir-belgium.org/samples/142 Library 1 \n", + "1 https://datahub.elixir-belgium.org/samples/143 Library 2 \n", + "2 https://datahub.elixir-belgium.org/samples/144 Library 3 \n", + "3 https://datahub.elixir-belgium.org/samples/145 Library 4 \n", + "4 https://datahub.elixir-belgium.org/samples/237 Library 1 \n", + "5 https://datahub.elixir-belgium.org/samples/238 Library 2 \n", + "6 https://datahub.elixir-belgium.org/samples/239 Library 3 \n", + "7 https://datahub.elixir-belgium.org/samples/240 Library 4 \n", + "8 https://datahub.elixir-belgium.org/samples/240 Library 5 \n", "\n", - " title accession submission date status design_description \\\n", - "0 experiment_title_1 add design_description_1 \n", - "1 experiment_title_2 add design_description_2 \n", - "2 experiment_title_3 add design_description_3 \n", + " title accession submission date status \\\n", + "0 Library title 1 \n", + "1 Library title 2 \n", + "2 Library title 3 \n", + "3 Library title 4 \n", + "4 Library title 1 \n", + "5 Library title 2 \n", + "6 Library title 3 \n", + "7 Library title 4 \n", + "8 Library title 5 \n", "\n", - " library_source library_strategy library_selection library_layout \\\n", - "0 GENOMIC WGA RANDOM PAIRED \n", - "1 TRANSCRIPTOMIC RNA-Seq repeat fractionation SINGLE \n", - "2 TRANSCRIPTOMIC RNA-Seq repeat fractionation SINGLE \n", + " library_construction_protocol design_description \\\n", + "0 My special protocol 1 Library description 1 \n", + "1 My special protocol 2 Library description 2 \n", + "2 My special protocol 3 Library description 3 \n", + "3 My special protocol 4 Library description 4 \n", + "4 My library construction protocol \n", + "5 My library construction protocol \n", + "6 My library construction protocol \n", + "7 My library construction protocol \n", + "8 My library construction protocol \n", "\n", - " insert_size platform instrument_model \n", - "0 250 LS454 454 GS 20 \n", - "1 None Illumina Illumina Genome Analyzer \n", - "2 None Illumina Illumina Genome Analyzer " + " library_source library_strategy library_selection \\\n", + "0 GENOMIC WGS RANDOM \n", + "1 GENOMIC SINGLE CELL WGA PCR \n", + "2 TRANSCRIPTOMIC WXS RANDOM PCR \n", + "3 TRANSCRIPTOMIC SINGLE CELL RNA-Seq RT-PCR \n", + "4 GENOMIC WGS PCR \n", + "5 GENOMIC WGS PCR \n", + "6 GENOMIC WGS PCR \n", + "7 METAGENOMIC WGS PCR \n", + "8 METAGENOMIC WGS PCR \n", + "\n", + " library_layout insert_size platform \\\n", + "0 SINGLE 123 LS454 \n", + "1 PAIRED 234 Illumina \n", + "2 SINGLE 345 PacBio \n", + "3 PAIRED 456 Themo Fisher Scientific \n", + "4 PAIRED 1 LS454 \n", + "5 SINGLE 1 LS454 \n", + "6 SINGLE 1 LS454 \n", + "7 SINGLE 1 Illumina \n", + "8 SINGLE 1 Illumina \n", + "\n", + " instrument_model \n", + "0 454 GS \n", + "1 Illumina Genome Analyzer \n", + "2 PacBio RS \n", + "3 AB 3730xL Genetic Analyzer \n", + "4 454 GS \n", + "5 454 GS \n", + "6 454 GS \n", + "7 Illumina Genome Analyzer II \n", + "8 Illumina Genome Analyzer II " ] }, "metadata": {}, @@ -422,51 +744,154 @@ " \n", " \n", " 0\n", - " run_alias_233_234\n", - " experiment_alias_227\n", - " ENA_TEST2.R1.fastq.gz\n", - " fastq\n", + " https://datahub.elixir-belgium.org/samples/150\n", + " https://datahub.elixir-belgium.org/samples/146\n", + " data_file_1.bam\n", + " bam\n", + " \n", " \n", " \n", " \n", - " add\n", " \n", " \n", " 1\n", - " run_alias_233_234\n", - " experiment_alias_227\n", - " ENA_TEST2.R2.fastq.gz\n", - " fastq\n", + " https://datahub.elixir-belgium.org/samples/151\n", + " https://datahub.elixir-belgium.org/samples/147\n", + " data_file_2.cram\n", + " cram\n", + " \n", " \n", " \n", " \n", - " add\n", " \n", " \n", " 2\n", - " run_alias_235\n", - " experiment_alias_229\n", - " ENA_TEST1.R1.fastq.gz\n", + " https://datahub.elixir-belgium.org/samples/152\n", + " https://datahub.elixir-belgium.org/samples/148\n", + " data_file_3.fastq\n", + " fastq\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " 3\n", + " https://datahub.elixir-belgium.org/samples/153\n", + " https://datahub.elixir-belgium.org/samples/149\n", + " data_file_4.sff\n", + " sff\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " 4\n", + " https://datahub.elixir-belgium.org/samples/246...\n", + " https://datahub.elixir-belgium.org/samples/241\n", + " data_file_1_A.fastq\n", " fastq\n", " \n", " \n", " \n", - " add\n", + " \n", + " \n", + " \n", + " 5\n", + " https://datahub.elixir-belgium.org/samples/246...\n", + " https://datahub.elixir-belgium.org/samples/241\n", + " data_file_1_B.fastq\n", + " fastq\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " 6\n", + " https://datahub.elixir-belgium.org/samples/248\n", + " https://datahub.elixir-belgium.org/samples/242\n", + " data_file_3.fastq\n", + " fastq\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " 7\n", + " https://datahub.elixir-belgium.org/samples/249\n", + " https://datahub.elixir-belgium.org/samples/243\n", + " data_file_4.fastq\n", + " fastq\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " 8\n", + " https://datahub.elixir-belgium.org/samples/250\n", + " https://datahub.elixir-belgium.org/samples/244\n", + " data_file_5.fastq\n", + " fastq\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " 9\n", + " https://datahub.elixir-belgium.org/samples/251\n", + " https://datahub.elixir-belgium.org/samples/245\n", + " data_file_6.fastq\n", + " fastq\n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "\n", "" ], "text/plain": [ - " alias experiment_alias file_name file_type \\\n", - "0 run_alias_233_234 experiment_alias_227 ENA_TEST2.R1.fastq.gz fastq \n", - "1 run_alias_233_234 experiment_alias_227 ENA_TEST2.R2.fastq.gz fastq \n", - "2 run_alias_235 experiment_alias_229 ENA_TEST1.R1.fastq.gz fastq \n", + " alias \\\n", + "0 https://datahub.elixir-belgium.org/samples/150 \n", + "1 https://datahub.elixir-belgium.org/samples/151 \n", + "2 https://datahub.elixir-belgium.org/samples/152 \n", + "3 https://datahub.elixir-belgium.org/samples/153 \n", + "4 https://datahub.elixir-belgium.org/samples/246... \n", + "5 https://datahub.elixir-belgium.org/samples/246... \n", + "6 https://datahub.elixir-belgium.org/samples/248 \n", + "7 https://datahub.elixir-belgium.org/samples/249 \n", + "8 https://datahub.elixir-belgium.org/samples/250 \n", + "9 https://datahub.elixir-belgium.org/samples/251 \n", + "\n", + " experiment_alias file_name \\\n", + "0 https://datahub.elixir-belgium.org/samples/146 data_file_1.bam \n", + "1 https://datahub.elixir-belgium.org/samples/147 data_file_2.cram \n", + "2 https://datahub.elixir-belgium.org/samples/148 data_file_3.fastq \n", + "3 https://datahub.elixir-belgium.org/samples/149 data_file_4.sff \n", + "4 https://datahub.elixir-belgium.org/samples/241 data_file_1_A.fastq \n", + "5 https://datahub.elixir-belgium.org/samples/241 data_file_1_B.fastq \n", + "6 https://datahub.elixir-belgium.org/samples/242 data_file_3.fastq \n", + "7 https://datahub.elixir-belgium.org/samples/243 data_file_4.fastq \n", + "8 https://datahub.elixir-belgium.org/samples/244 data_file_5.fastq \n", + "9 https://datahub.elixir-belgium.org/samples/245 data_file_6.fastq \n", "\n", - " file checksum accession submission date status \n", - "0 add \n", - "1 add \n", - "2 add " + " file_type file checksum accession submission date status \n", + "0 bam \n", + "1 cram \n", + "2 fastq \n", + "3 sff \n", + "4 fastq \n", + "5 fastq \n", + "6 fastq \n", + "7 fastq \n", + "8 fastq \n", + "9 fastq " ] }, "metadata": {}, From addc64d88de1803c51b395dfa84c6e91d4e21361 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer <82407142+kdp-cloud@users.noreply.github.com> Date: Thu, 9 Nov 2023 08:31:45 +0000 Subject: [PATCH 52/62] Remove example python notebook --- example_read_isa_json.ipynb | 977 ------------------------------------ 1 file changed, 977 deletions(-) delete mode 100644 example_read_isa_json.ipynb diff --git a/example_read_isa_json.ipynb b/example_read_isa_json.ipynb deleted file mode 100644 index 6f1d6bc..0000000 --- a/example_read_isa_json.ipynb +++ /dev/null @@ -1,977 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Example Script for parsing an ISA JSON" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Import statements" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import os\n", - "from ena_upload.json_parsing.ena_submission import EnaSubmission\n", - "from dotenv import dotenv_values\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Reading a JSON file" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "# Read json file\n", - "isa_json_file = open(\n", - " \"tests/test_data/multi_study_multi_assay_stream_investigation_v3.json\"\n", - ")\n", - "isa_json = json.load(isa_json_file)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Setting some extra parameters" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "\n", - "# Change this to 'True' if you want to export the resulting DataFrames to an xlsx.\n", - "export_to_excel = False\n", - "outputfolder = \"./output_folder/\"\n", - "\n", - "required_assays = [\n", - " {\"assay_stream\": \"Ena stream 1\"},\n", - " {\"ena_study_title\": \"Extra study\"}\n", - " ]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Parsing" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "\n", - "submission = EnaSubmission.from_isa_json(isa_json, required_assays)\n", - "submission_dfs = submission.generate_dataframes()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Output" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dataframe study:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
aliastitlestudy_typestudy_abstractnew_study_typepubmed_id
0https://datahub.elixir-belgium.org/studies/27_28Ena Study 1Whole Genome SequencingThis is Ena Study 1.5,6
1https://datahub.elixir-belgium.org/studies/50_51Extra studyWhole Genome Sequencingblablabla
\n", - "
" - ], - "text/plain": [ - " alias title \\\n", - "0 https://datahub.elixir-belgium.org/studies/27_28 Ena Study 1 \n", - "1 https://datahub.elixir-belgium.org/studies/50_51 Extra study \n", - "\n", - " study_type study_abstract new_study_type pubmed_id \n", - "0 Whole Genome Sequencing This is Ena Study 1. 5,6 \n", - "1 Whole Genome Sequencing blablabla " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dataframe sample:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
aliastitlesample_descriptioncollection dateaccessionsubmission datestatusgeographic location (country and/or sea)taxon_idParameter Value 1Parameter Value 2
0https://datahub.elixir-belgium.org/samples/142Sample title 1Sample description 12023Afghanistan1234
1https://datahub.elixir-belgium.org/samples/143Sample title 2Sample description 22022Afghanistan1234
2https://datahub.elixir-belgium.org/samples/144Sample title 3Sample description 32021Albania2345
3https://datahub.elixir-belgium.org/samples/145Sample title 4Sample description 42020Albania2345
4https://datahub.elixir-belgium.org/samples/237Sample title 1This is sample 12001Antarctica254564AC
5https://datahub.elixir-belgium.org/samples/238Sample title 2This is sample 22002Antarctica254564AC
6https://datahub.elixir-belgium.org/samples/239Sample title 3This is sample 32003Antarctica254564BC
7https://datahub.elixir-belgium.org/samples/240Sample title 4This is sample 42004Antarctica254564AD
\n", - "
" - ], - "text/plain": [ - " alias title \\\n", - "0 https://datahub.elixir-belgium.org/samples/142 Sample title 1 \n", - "1 https://datahub.elixir-belgium.org/samples/143 Sample title 2 \n", - "2 https://datahub.elixir-belgium.org/samples/144 Sample title 3 \n", - "3 https://datahub.elixir-belgium.org/samples/145 Sample title 4 \n", - "4 https://datahub.elixir-belgium.org/samples/237 Sample title 1 \n", - "5 https://datahub.elixir-belgium.org/samples/238 Sample title 2 \n", - "6 https://datahub.elixir-belgium.org/samples/239 Sample title 3 \n", - "7 https://datahub.elixir-belgium.org/samples/240 Sample title 4 \n", - "\n", - " sample_description collection date accession submission date status \\\n", - "0 Sample description 1 2023 \n", - "1 Sample description 2 2022 \n", - "2 Sample description 3 2021 \n", - "3 Sample description 4 2020 \n", - "4 This is sample 1 2001 \n", - "5 This is sample 2 2002 \n", - "6 This is sample 3 2003 \n", - "7 This is sample 4 2004 \n", - "\n", - " geographic location (country and/or sea) taxon_id Parameter Value 1 \\\n", - "0 Afghanistan 1234 \n", - "1 Afghanistan 1234 \n", - "2 Albania 2345 \n", - "3 Albania 2345 \n", - "4 Antarctica 254564 A \n", - "5 Antarctica 254564 A \n", - "6 Antarctica 254564 B \n", - "7 Antarctica 254564 A \n", - "\n", - " Parameter Value 2 \n", - "0 \n", - "1 \n", - "2 \n", - "3 \n", - "4 C \n", - "5 C \n", - "6 C \n", - "7 D " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dataframe experiment:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
aliasstudy_aliassample_aliaslibrary_nametitleaccessionsubmission datestatuslibrary_construction_protocoldesign_descriptionlibrary_sourcelibrary_strategylibrary_selectionlibrary_layoutinsert_sizeplatforminstrument_model
0https://datahub.elixir-belgium.org/samples/146https://datahub.elixir-belgium.org/studies/27_28https://datahub.elixir-belgium.org/samples/142Library 1Library title 1My special protocol 1Library description 1GENOMICWGSRANDOMSINGLE123LS454454 GS
1https://datahub.elixir-belgium.org/samples/147https://datahub.elixir-belgium.org/studies/27_28https://datahub.elixir-belgium.org/samples/143Library 2Library title 2My special protocol 2Library description 2GENOMIC SINGLE CELLWGAPCRPAIRED234IlluminaIllumina Genome Analyzer
2https://datahub.elixir-belgium.org/samples/148https://datahub.elixir-belgium.org/studies/27_28https://datahub.elixir-belgium.org/samples/144Library 3Library title 3My special protocol 3Library description 3TRANSCRIPTOMICWXSRANDOM PCRSINGLE345PacBioPacBio RS
3https://datahub.elixir-belgium.org/samples/149https://datahub.elixir-belgium.org/studies/27_28https://datahub.elixir-belgium.org/samples/145Library 4Library title 4My special protocol 4Library description 4TRANSCRIPTOMIC SINGLE CELLRNA-SeqRT-PCRPAIRED456Themo Fisher ScientificAB 3730xL Genetic Analyzer
4https://datahub.elixir-belgium.org/samples/241https://datahub.elixir-belgium.org/studies/50_51https://datahub.elixir-belgium.org/samples/237Library 1Library title 1My library construction protocolGENOMICWGSPCRPAIRED1LS454454 GS
5https://datahub.elixir-belgium.org/samples/242https://datahub.elixir-belgium.org/studies/50_51https://datahub.elixir-belgium.org/samples/238Library 2Library title 2My library construction protocolGENOMICWGSPCRSINGLE1LS454454 GS
6https://datahub.elixir-belgium.org/samples/243https://datahub.elixir-belgium.org/studies/50_51https://datahub.elixir-belgium.org/samples/239Library 3Library title 3My library construction protocolGENOMICWGSPCRSINGLE1LS454454 GS
7https://datahub.elixir-belgium.org/samples/244https://datahub.elixir-belgium.org/studies/50_51https://datahub.elixir-belgium.org/samples/240Library 4Library title 4My library construction protocolMETAGENOMICWGSPCRSINGLE1IlluminaIllumina Genome Analyzer II
8https://datahub.elixir-belgium.org/samples/245https://datahub.elixir-belgium.org/studies/50_51https://datahub.elixir-belgium.org/samples/240Library 5Library title 5My library construction protocolMETAGENOMICWGSPCRSINGLE1IlluminaIllumina Genome Analyzer II
\n", - "
" - ], - "text/plain": [ - " alias \\\n", - "0 https://datahub.elixir-belgium.org/samples/146 \n", - "1 https://datahub.elixir-belgium.org/samples/147 \n", - "2 https://datahub.elixir-belgium.org/samples/148 \n", - "3 https://datahub.elixir-belgium.org/samples/149 \n", - "4 https://datahub.elixir-belgium.org/samples/241 \n", - "5 https://datahub.elixir-belgium.org/samples/242 \n", - "6 https://datahub.elixir-belgium.org/samples/243 \n", - "7 https://datahub.elixir-belgium.org/samples/244 \n", - "8 https://datahub.elixir-belgium.org/samples/245 \n", - "\n", - " study_alias \\\n", - "0 https://datahub.elixir-belgium.org/studies/27_28 \n", - "1 https://datahub.elixir-belgium.org/studies/27_28 \n", - "2 https://datahub.elixir-belgium.org/studies/27_28 \n", - "3 https://datahub.elixir-belgium.org/studies/27_28 \n", - "4 https://datahub.elixir-belgium.org/studies/50_51 \n", - "5 https://datahub.elixir-belgium.org/studies/50_51 \n", - "6 https://datahub.elixir-belgium.org/studies/50_51 \n", - "7 https://datahub.elixir-belgium.org/studies/50_51 \n", - "8 https://datahub.elixir-belgium.org/studies/50_51 \n", - "\n", - " sample_alias library_name \\\n", - "0 https://datahub.elixir-belgium.org/samples/142 Library 1 \n", - "1 https://datahub.elixir-belgium.org/samples/143 Library 2 \n", - "2 https://datahub.elixir-belgium.org/samples/144 Library 3 \n", - "3 https://datahub.elixir-belgium.org/samples/145 Library 4 \n", - "4 https://datahub.elixir-belgium.org/samples/237 Library 1 \n", - "5 https://datahub.elixir-belgium.org/samples/238 Library 2 \n", - "6 https://datahub.elixir-belgium.org/samples/239 Library 3 \n", - "7 https://datahub.elixir-belgium.org/samples/240 Library 4 \n", - "8 https://datahub.elixir-belgium.org/samples/240 Library 5 \n", - "\n", - " title accession submission date status \\\n", - "0 Library title 1 \n", - "1 Library title 2 \n", - "2 Library title 3 \n", - "3 Library title 4 \n", - "4 Library title 1 \n", - "5 Library title 2 \n", - "6 Library title 3 \n", - "7 Library title 4 \n", - "8 Library title 5 \n", - "\n", - " library_construction_protocol design_description \\\n", - "0 My special protocol 1 Library description 1 \n", - "1 My special protocol 2 Library description 2 \n", - "2 My special protocol 3 Library description 3 \n", - "3 My special protocol 4 Library description 4 \n", - "4 My library construction protocol \n", - "5 My library construction protocol \n", - "6 My library construction protocol \n", - "7 My library construction protocol \n", - "8 My library construction protocol \n", - "\n", - " library_source library_strategy library_selection \\\n", - "0 GENOMIC WGS RANDOM \n", - "1 GENOMIC SINGLE CELL WGA PCR \n", - "2 TRANSCRIPTOMIC WXS RANDOM PCR \n", - "3 TRANSCRIPTOMIC SINGLE CELL RNA-Seq RT-PCR \n", - "4 GENOMIC WGS PCR \n", - "5 GENOMIC WGS PCR \n", - "6 GENOMIC WGS PCR \n", - "7 METAGENOMIC WGS PCR \n", - "8 METAGENOMIC WGS PCR \n", - "\n", - " library_layout insert_size platform \\\n", - "0 SINGLE 123 LS454 \n", - "1 PAIRED 234 Illumina \n", - "2 SINGLE 345 PacBio \n", - "3 PAIRED 456 Themo Fisher Scientific \n", - "4 PAIRED 1 LS454 \n", - "5 SINGLE 1 LS454 \n", - "6 SINGLE 1 LS454 \n", - "7 SINGLE 1 Illumina \n", - "8 SINGLE 1 Illumina \n", - "\n", - " instrument_model \n", - "0 454 GS \n", - "1 Illumina Genome Analyzer \n", - "2 PacBio RS \n", - "3 AB 3730xL Genetic Analyzer \n", - "4 454 GS \n", - "5 454 GS \n", - "6 454 GS \n", - "7 Illumina Genome Analyzer II \n", - "8 Illumina Genome Analyzer II " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dataframe run:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
aliasexperiment_aliasfile_namefile_typefile checksumaccessionsubmission datestatus
0https://datahub.elixir-belgium.org/samples/150https://datahub.elixir-belgium.org/samples/146data_file_1.bambam
1https://datahub.elixir-belgium.org/samples/151https://datahub.elixir-belgium.org/samples/147data_file_2.cramcram
2https://datahub.elixir-belgium.org/samples/152https://datahub.elixir-belgium.org/samples/148data_file_3.fastqfastq
3https://datahub.elixir-belgium.org/samples/153https://datahub.elixir-belgium.org/samples/149data_file_4.sffsff
4https://datahub.elixir-belgium.org/samples/246...https://datahub.elixir-belgium.org/samples/241data_file_1_A.fastqfastq
5https://datahub.elixir-belgium.org/samples/246...https://datahub.elixir-belgium.org/samples/241data_file_1_B.fastqfastq
6https://datahub.elixir-belgium.org/samples/248https://datahub.elixir-belgium.org/samples/242data_file_3.fastqfastq
7https://datahub.elixir-belgium.org/samples/249https://datahub.elixir-belgium.org/samples/243data_file_4.fastqfastq
8https://datahub.elixir-belgium.org/samples/250https://datahub.elixir-belgium.org/samples/244data_file_5.fastqfastq
9https://datahub.elixir-belgium.org/samples/251https://datahub.elixir-belgium.org/samples/245data_file_6.fastqfastq
\n", - "
" - ], - "text/plain": [ - " alias \\\n", - "0 https://datahub.elixir-belgium.org/samples/150 \n", - "1 https://datahub.elixir-belgium.org/samples/151 \n", - "2 https://datahub.elixir-belgium.org/samples/152 \n", - "3 https://datahub.elixir-belgium.org/samples/153 \n", - "4 https://datahub.elixir-belgium.org/samples/246... \n", - "5 https://datahub.elixir-belgium.org/samples/246... \n", - "6 https://datahub.elixir-belgium.org/samples/248 \n", - "7 https://datahub.elixir-belgium.org/samples/249 \n", - "8 https://datahub.elixir-belgium.org/samples/250 \n", - "9 https://datahub.elixir-belgium.org/samples/251 \n", - "\n", - " experiment_alias file_name \\\n", - "0 https://datahub.elixir-belgium.org/samples/146 data_file_1.bam \n", - "1 https://datahub.elixir-belgium.org/samples/147 data_file_2.cram \n", - "2 https://datahub.elixir-belgium.org/samples/148 data_file_3.fastq \n", - "3 https://datahub.elixir-belgium.org/samples/149 data_file_4.sff \n", - "4 https://datahub.elixir-belgium.org/samples/241 data_file_1_A.fastq \n", - "5 https://datahub.elixir-belgium.org/samples/241 data_file_1_B.fastq \n", - "6 https://datahub.elixir-belgium.org/samples/242 data_file_3.fastq \n", - "7 https://datahub.elixir-belgium.org/samples/243 data_file_4.fastq \n", - "8 https://datahub.elixir-belgium.org/samples/244 data_file_5.fastq \n", - "9 https://datahub.elixir-belgium.org/samples/245 data_file_6.fastq \n", - "\n", - " file_type file checksum accession submission date status \n", - "0 bam \n", - "1 cram \n", - "2 fastq \n", - "3 sff \n", - "4 fastq \n", - "5 fastq \n", - "6 fastq \n", - "7 fastq \n", - "8 fastq \n", - "9 fastq " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Done!\n" - ] - } - ], - "source": [ - "\n", - "if (not os.path.exists(outputfolder)) and export_to_excel:\n", - " os.makedirs(outputfolder)\n", - "\n", - "for k, df in submission_dfs.items():\n", - " print(f\"Dataframe {k}:\")\n", - " display(df)\n", - " if export_to_excel:\n", - " df.to_excel(f\"{outputfolder}{k}.xlsx\")\n", - "\n", - "print(\"Done!\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Automated pipeline for DataHub" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "config = dotenv_values(\".env\")\n", - "datahub_token = config[\"DATAHUB_API_TOKEN\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import requests\n", - "\n", - "url = \"https://datahub-dev.elixir-belgium.org/single_pages/16/export_isa\"\n", - "url = \"http://localhost:3000/single_pages/2/export_isa\"\n", - "data = {\"key\": \"value\"}\n", - "headers = {\"Content-Type\": \"application/json\"}\n", - "\n", - "response = requests.post(url, json=data, headers=headers)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 02a6cb8e988d3a01ff7a3533651a6c666535f0c0 Mon Sep 17 00:00:00 2001 From: bedroesb Date: Thu, 16 Nov 2023 10:18:39 +0100 Subject: [PATCH 53/62] attempt to fix the setup.py --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 9b5e2b2..50d6430 100644 --- a/setup.py +++ b/setup.py @@ -15,10 +15,10 @@ author="Dilmurat Yusuf", author_email="bjoern.gruening@gmail.com", long_description_content_type='text/markdown', - packages=['ena_upload'], + packages=['ena_upload', 'ena_upload.json_parsing'], package_dir={'ena_upload': 'ena_upload'}, package_data={ - 'ena_upload': ['templates/*.xml', 'templates/*.xsd'] + 'ena_upload': ['templates/*.xml', 'templates/*.xsd', 'json_parsing/json_schemas/*.json'] }, long_description=long_description, url="https://github.com/usegalaxy-eu/ena-upload-cli", From f40f51d3a626001459f29d07df633f79d50578f8 Mon Sep 17 00:00:00 2001 From: bedroesb Date: Thu, 16 Nov 2023 14:40:13 +0100 Subject: [PATCH 54/62] raise error when assay stream is not present --- ena_upload/json_parsing/ena_submission.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ena_upload/json_parsing/ena_submission.py b/ena_upload/json_parsing/ena_submission.py index ceadfb1..b848e8e 100644 --- a/ena_upload/json_parsing/ena_submission.py +++ b/ena_upload/json_parsing/ena_submission.py @@ -69,6 +69,15 @@ def __init__( self.experiments = experiments self.runs = runs + def to_dict(self) -> Dict: + return { + "study": [study.to_dict() for study in self.studies], + "sample": [sample.to_dict() for sample in self.samples], + "experiment": [experiment.to_dict() for experiment in self.experiments], + "run": [run.to_dict() for run in self.runs], + } + + def from_isa_json( isa_json: Dict[str, str], required_assays: List[Dict[str, str]] ) -> None: @@ -87,6 +96,12 @@ def from_isa_json( studies = [] experiments = [] runs = [] + + assay_stream_names = [a_stream['assay_stream'] for a_stream in required_assays] + + if filtered_isa_json["studies"] == []: + raise ValueError(f"No studies found with isa_assay_stream {assay_stream_names}") + for study in filtered_isa_json["studies"]: [samples.append(sample) for sample in EnaSample.from_study_dict(study)] From 7fcc93bd151792e47c6dbf0a824a37eeefe66761 Mon Sep 17 00:00:00 2001 From: bedroesb Date: Thu, 16 Nov 2023 14:42:22 +0100 Subject: [PATCH 55/62] some typos --- ena_upload/ena_upload.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ena_upload/ena_upload.py b/ena_upload/ena_upload.py index dc07097..c5597c3 100755 --- a/ena_upload/ena_upload.py +++ b/ena_upload/ena_upload.py @@ -374,7 +374,7 @@ def get_taxon_id(scientific_name): taxon_id = r.json()[0]['taxId'] return taxon_id except ValueError: - msg = f'Oops, no taxon ID avaible for {scientific_name}. Is it a valid scientific name?' + msg = f'Oops, no taxon ID available for {scientific_name}. Is it a valid scientific name?' sys.exit(msg) @@ -393,7 +393,7 @@ def get_scientific_name(taxon_id): taxon_id = r.json()['scientificName'] return taxon_id except ValueError: - msg = f'Oops, no scientific name avaible for {taxon_id}. Is it a valid taxon_id?' + msg = f'Oops, no scientific name available for {taxon_id}. Is it a valid taxon_id?' sys.exit(msg) @@ -416,9 +416,8 @@ def submit_data(file_paths, password, webin_id): except IOError as ioe: print(ioe) - print("ERROR: could not connect to the ftp server.\ + sys.exit("ERROR: could not connect to the ftp server.\ Please check your login details.") - sys.exit() for filename, path in file_paths.items(): print(f'uploading {path}') try: From bf3b20cb569f99c3d492b7bfed6c16e4f18dd7ad Mon Sep 17 00:00:00 2001 From: bedroesb Date: Thu, 16 Nov 2023 15:20:45 +0100 Subject: [PATCH 56/62] no receipt --- receipt.xml | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 receipt.xml diff --git a/receipt.xml b/receipt.xml deleted file mode 100644 index c298787..0000000 --- a/receipt.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - In study, alias: "https://datahub.elixir-belgium.org/studies/27_28". The object being added already exists in the submission account with accession: "ERP152667". - This submission is a TEST submission and will be discarded within 24 hours - - ADD - HOLD - \ No newline at end of file From 81c601e3276fd9dadbf37cf37ad78f1cec433522 Mon Sep 17 00:00:00 2001 From: bedroesb Date: Mon, 20 Nov 2023 09:19:37 +0100 Subject: [PATCH 57/62] new version --- ena_upload/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ena_upload/_version.py b/ena_upload/_version.py index 364e7ba..49e0fc1 100644 --- a/ena_upload/_version.py +++ b/ena_upload/_version.py @@ -1 +1 @@ -__version__ = "0.6.4" +__version__ = "0.7.0" From 3a0f27d6828192b3d715f36cf41631677234aeac Mon Sep 17 00:00:00 2001 From: bedroesb Date: Mon, 20 Nov 2023 13:43:46 +0100 Subject: [PATCH 58/62] update documentation --- README.md | 75 ++++++++++++++++--------------------------------------- 1 file changed, 21 insertions(+), 54 deletions(-) diff --git a/README.md b/README.md index f819669..65f662e 100644 --- a/README.md +++ b/README.md @@ -7,18 +7,18 @@ # ENA upload tool -This command line tool (CLI) allows easy submission of experimental data and respective metadata to the European Nucleotide Archive (ENA) using tabular files or one of the excel spreadsheets that can be found on this [template repo](https://github.com/ELIXIR-Belgium/ENA-metadata-templates). The supported metadata that can be submitted includes study, sample, run and experiment info so you can use the tool for programatic submission of everything ENA needs without the need of logging in to the Webin interface. This also includes client side validation using ENA checklists and releasing the ENA objects. This command line tool is also available as a [Galaxy tool](https://toolshed.g2.bx.psu.edu/view/iuc/ena_upload/) and can be added to you own Galaxy instance or you can make use of one of the existing Galaxy instances, like [usegalaxy.eu](https://usegalaxy.eu/root?tool_id=toolshed.g2.bx.psu.edu/repos/iuc/ena_upload/ena_upload). +This command line tool (CLI) allows easy submission of experimental data and respective metadata to the European Nucleotide Archive (ENA) using tabular files or one of the excel spreadsheets that can be found on this [template repo](https://github.com/ELIXIR-Belgium/ENA-metadata-templates). The supported metadata that can be submitted includes study, sample, run and experiment info so you can use the tool for programmatic submission of everything ENA needs without the need of logging in to the Webin interface. This also includes client side validation using ENA checklists and releasing the ENA objects. This command line tool is also available as a [Galaxy tool](https://toolshed.g2.bx.psu.edu/view/iuc/ena_upload/) and can be added to you own Galaxy instance or you can make use of one of the existing Galaxy instances, like [usegalaxy.eu](https://usegalaxy.eu/root?tool_id=toolshed.g2.bx.psu.edu/repos/iuc/ena_upload/ena_upload). ## Overview -The metadata should be provided in separate tables corresponding to the following ENA objects: +The metadata should be provided in separate tables or files carrying similar information corresponding to the following ENA objects: * STUDY * SAMPLE * EXPERIMENT * RUN -The program to perform the following actions: +You can set the tool to perform the following actions: * add: add an object to the archive * modify: modify an object in the archive @@ -29,11 +29,15 @@ After a successful submission, new tsv tables will be generated with the ENA acc ## Tool dependencies -* python 3.5+ including following packages: +* python 3.7+ including following packages: * Genshi * lxml * pandas * requests + * pyyaml + * openpyxl + * jsonschema + ## Installation @@ -60,12 +64,14 @@ All supported arguments: --experiment EXPERIMENT table of EXPERIMENT object --run RUN table of RUN object - --data [FILE [FILE ...]] - data for submission + --data [FILE ...] data for submission --center CENTER_NAME specific to your Webin account --checklist CHECKLIST specify the sample checklist with following pattern: ERC0000XX, Default: ERC000011 --xlsx XLSX filled in excel template with metadata + --isa_json ISA_JSON ISA json describing describing the ENA objects + --isa_assay_stream ISA_ASSAY_STREAM + specify the assay stream that holds the ENA information --auto_action BETA: detect automatically which action (add or modify) to apply when the action column is not given --tool TOOL_NAME specify the name of the tool this submission is done with. Default: ena-upload-cli --tool_version TOOL_VERSION @@ -88,7 +94,7 @@ To avoid exposing your credentials through the terminal history, it is recommend ### ENA sample checklists -You can specify ENA sample checklist using the `--checklist` parameter. By default the ENA default sample checklist is used supporting the minimum information required for the sample (ERC000011). The supported checklists are listed on the [ENA website](https://www.ebi.ac.uk/ena/browser/checklists). This website will also describe which Field Names you have to use in the header of your sample tsv table. The Field Names will be automatically mapped in the outputted xml if the correct `--checklist` parameter is given. +You can specify ENA sample checklist using the `--checklist` parameter. By default the ENA default sample checklist is used supporting the minimum information required for the sample (ERC000011). The supported checklists are listed on our [template repo](https://github.com/ELIXIR-Belgium/ENA-metadata-templates). #### Fixed sample columns @@ -104,55 +110,11 @@ The command line tool will automatically fetch the correct scientific name based #### Viral submissions -If you want to submit viral samples you can use the [ENA virus pathogen](https://www.ebi.ac.uk/ena/browser/view/ERC000033) checklist by adding `ERC000033` to the checklist parameter. Check out our [viral example command](#test-the-tool) as demonstration. Please use the [ENA virus pathogen](https://www.ebi.ac.uk/ena/browser/view/ERC000033) checklist on the website of ENA to know which values are allowed/possible in the `restricted text` and `text choice` fields. +If you want to submit viral samples you can use the [ENA virus pathogen](https://www.ebi.ac.uk/ena/browser/view/ERC000033) checklist by adding `ERC000033` to the checklist parameter. Check out our [viral example command](#test-the-tool) as demonstration. Please use the [ENA virus pathogen](https://github.com/ELIXIR-Belgium/ENA-metadata-templates/tree/main/templates/ERC000033) checklist in our template repo to know what is allowed/possible in the `Controlled vocabulary`fields. ### ENA study, experiment and run tables -Here we list all the possible columns one can have in its study, experiment or run table along with its cardinality and controlled vocabulary (CV). -Currently we refer to the [ENA Webin](https://wwwdev.ebi.ac.uk/ena/submit/webin/) to discover which values are allowed when a controlled vocabulary is used, but this will change in the future. - -#### Study tsv table - -| Name of column | Cardinality | Documentation | CV | -|---|---|---|---| -| alias | mandatory | Submitter designated name for the object. The name must be unique within the submission account. | | -| title | mandatory | Title of the study as would be used in a publication. | | -| study_type | mandatory | The STUDY_TYPE presents a controlled vocabulary for expressing the overall purpose of the study. | yes | -| study_abstract | mandatory | Briefly describes the goals, purpose, and scope of the Study. This need not be listed if it can be inherited from a referenced publication. | | -| center_project_name | optional | Submitter defined project name. This field is intended for backward tracking of the study record to the submitter's LIMS. | | -| study_description | optional | More extensive free-form description of the study. | | -| pubmed_id | optional | Link to publication related to this study. | | - -#### Experiment tsv table - -| Name of column | Cardinality | Documentation | CV | -|---|---|---|---| -| alias | mandatory | Submitter designated name for the object. The name must be unique within the submission account. | | -| title | mandatory | Short text that can be used to call out experiment records in searches or in displays. | | -| study_alias | mandatory | Identifies the parent study. | | -| sample_alias | mandatory | Pick a sample to associate this experiment with. The sample may be an individual or a pool, depending on how it is specified. | | -| design_description | mandatory | Goal and setup of the individual library including library was constructed. | | -| spot_descriptor | optional | The SPOT_DESCRIPTOR specifies how to decode the individual reads of interest from the monolithic spot sequence. The spot descriptor contains aspects of the experimental design, platform, and processing information. There will be two methods of specification: one will be an index into a table of typical decodings, the other being an exact specification. This construct is needed for loading data and for interpreting the loaded runs. It can be omitted if the loader can infer read layout (from multiple input files or from one input files). | | -| library_name | optional | The submitter's name for this library. | | -| library_layout | mandatory | LIBRARY_LAYOUT specifies whether to expect single, paired, or other configuration of reads. In the case of paired reads, information about the relative distance and orientation is specified. | yes | -| insert_size | mandatory | Relative distance. | | -| library_strategy | mandatory | Sequencing technique intended for this library | yes | -| library_source | mandatory | The LIBRARY_SOURCE specifies the type of source material that is being sequenced. | yes | -| library_selection | mandatory | Method used to enrich the target in the sequence library preparation | yes | -| platform | mandatory | The PLATFORM record selects which sequencing platform and platform-specific runtime parameters. This will be determined by the Center. | yes | -| instrument_model | mandatory | Model of the sequencing instrument. | yes | -| library_construction_protocol | optional | Free form text describing the protocol by which the sequencing library was constructed. | | - - -#### Run tsv table - -| Name of column | Cardinality | Documentation | CV | -|---|---|---|---| -| alias | mandatory | Submitter designated name for the object. The name must be unique within the submission account. | | -| experiment_alias | mandatory | Identifies the parent experiment. | | -| file_name | mandatory | The name or relative pathname of a run data file. | | -| file_type | mandatory | The run data file model. | yes | -| file_checksum | optional | Checksum of uncompressed file. If not given, the checksum will be calculated based on the data files specified in the --data option | | +Please check out the [template](https://github.com/ELIXIR-Belgium/ENA-metadata-templates) of your checklist to discover which attributes are mandatory for the study, experiment and run ENA object. ### Dev instance @@ -176,7 +138,7 @@ There are two ways of submitting only a selection of objects to ENA. This is han | sample_alias_5 | | sample_title_2 | 2697049 | sample_description_2 | -> IMPORTANT: if the status column is given but not filled in, or filled in with a different action from the one in the `--action` parameter, not rows will be submitted! Either leave out the column or add to every row the corect action. +> IMPORTANT: if the status column is given but not filled in, or filled in with a different action from the one in the `--action` parameter, no rows will be submitted! Either leave out the column or add to every row you want to submit the correct action. ### Using Excel templates @@ -262,6 +224,11 @@ By default the updated tables after submission will have the action `added` in t ena-upload-cli --action add --center 'your_center_name' --data example_data/*gz --dev --checklist ERC000033 --secret .secret.yml --xlsx example_tables/ENA_excel_example_ERC000033.xlsx ``` +* **Using an ISA JSON** + ``` + ena-upload-cli --action add --center 'your_center_name' --data example_data/*gz --dev --checklist ERC000033 --secret .secret.yml --isa_json tests/test_data/simple_test_case_v2.json --isa_assay_stream "Ena stream 1" + ``` + * **Release submission** ``` ena-upload-cli --action release --center'your_center_name' --study example_tables/ENA_template_studies_release.tsv --dev --secret .secret.yml From 574b85781a311945d4b5fa0d48a74a989829b1e2 Mon Sep 17 00:00:00 2001 From: bedroesb Date: Fri, 15 Dec 2023 16:53:09 +0100 Subject: [PATCH 59/62] doc --- README.md | 4 ++-- ena_upload/ena_upload.py | 13 ++++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 65f662e..530f489 100644 --- a/README.md +++ b/README.md @@ -69,9 +69,9 @@ All supported arguments: --checklist CHECKLIST specify the sample checklist with following pattern: ERC0000XX, Default: ERC000011 --xlsx XLSX filled in excel template with metadata - --isa_json ISA_JSON ISA json describing describing the ENA objects + --isa_json ISA_JSON BETA: ISA json describing describing the ENA objects --isa_assay_stream ISA_ASSAY_STREAM - specify the assay stream that holds the ENA information + BETA: specify the assay stream(s) that holds the ENA information, this can be a list of assay streams --auto_action BETA: detect automatically which action (add or modify) to apply when the action column is not given --tool TOOL_NAME specify the name of the tool this submission is done with. Default: ena-upload-cli --tool_version TOOL_VERSION diff --git a/ena_upload/ena_upload.py b/ena_upload/ena_upload.py index c5597c3..0a13ec1 100755 --- a/ena_upload/ena_upload.py +++ b/ena_upload/ena_upload.py @@ -424,7 +424,7 @@ def submit_data(file_paths, password, webin_id): print(ftps.storbinary(f'STOR {filename}', open(path, 'rb'))) except BaseException as err: print(f"ERROR: {err}") - print("ERROR: If your connection times out at this stage, it propably is because of a firewall that is in place. FTP is used in passive mode and connection will be opened to one of the ports: 40000 and 50000.") + print("ERROR: If your connection times out at this stage, it probably is because of a firewall that is in place. FTP is used in passive mode and connection will be opened to one of the ports: 40000 and 50000.") raise print(ftps.quit()) @@ -701,7 +701,7 @@ def process_args(): parser.add_argument('--data', nargs='*', - help='data for submission', + help='data for submission, this can be a list of files', metavar='FILE') parser.add_argument('--center', @@ -716,10 +716,11 @@ def process_args(): help='filled in excel template with metadata') parser.add_argument('--isa_json', - help='ISA json describing describing the ENA objects') + help='BETA: ISA json describing describing the ENA objects') parser.add_argument('--isa_assay_stream', - help='specify the assay stream that holds the ENA information') + nargs='*', + help='BETA: specify the assay stream(s) that holds the ENA information, this can be a list of assay streams') parser.add_argument('--auto_action', action="store_true", @@ -881,7 +882,9 @@ def main(): schema_tables = {} schema_dataframe = {} - required_assays = [{"assay_stream": isa_assay_stream}] + required_assays = [] + for stream in isa_assay_stream: + required_assays.append({"assay_stream": stream}) submission = EnaSubmission.from_isa_json(isa_json, required_assays) submission_dataframes = submission.generate_dataframes() for schema, df in submission_dataframes.items(): From 2a4d9a808372e4a39dd9a94422809eb31818df5c Mon Sep 17 00:00:00 2001 From: bedroesb Date: Fri, 15 Dec 2023 17:00:45 +0100 Subject: [PATCH 60/62] add isa_json --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 530f489..7658f31 100644 --- a/README.md +++ b/README.md @@ -177,7 +177,7 @@ By default the updated tables after submission will have the action `added` in t ## Tool overview **inputs**: -* metadata tables/excelsheet +* metadata tables/excelsheet/isa_json * examples in `example_table` and on this [template repo](https://github.com/ELIXIR-Belgium/ENA-metadata-templates) for excel sheets * (optional) define actions in **status** column e.g. `add`, `modify`, `cancel`, `release` (when not given the whole table is submitted) * to perform bulk submission of all objects, the `aliases ids` in different ENA objects should be in the association where alias ids in experiment object link all objects together From c6503de6639bed92d946bc2fdb5dce60cb75ff3a Mon Sep 17 00:00:00 2001 From: bedroesb Date: Mon, 18 Dec 2023 09:58:13 +0100 Subject: [PATCH 61/62] use with statement --- ena_upload/ena_upload.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ena_upload/ena_upload.py b/ena_upload/ena_upload.py index 0a13ec1..be66bed 100755 --- a/ena_upload/ena_upload.py +++ b/ena_upload/ena_upload.py @@ -878,7 +878,8 @@ def main(): schema_tables[schema] = f"{path}/ENA_template_{schema}.tsv" elif isa_json_file: # Read json file - isa_json = json.load(open(isa_json_file)) + with open(isa_json_file, 'r') as json_file: + isa_json = json.load(json_file) schema_tables = {} schema_dataframe = {} From abf7b73d8138959e1bb32e98d2707f367cb679cd Mon Sep 17 00:00:00 2001 From: bedroesb Date: Mon, 18 Dec 2023 10:00:58 +0100 Subject: [PATCH 62/62] fix example --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7658f31..0f8c86a 100644 --- a/README.md +++ b/README.md @@ -226,7 +226,7 @@ By default the updated tables after submission will have the action `added` in t * **Using an ISA JSON** ``` - ena-upload-cli --action add --center 'your_center_name' --data example_data/*gz --dev --checklist ERC000033 --secret .secret.yml --isa_json tests/test_data/simple_test_case_v2.json --isa_assay_stream "Ena stream 1" + ena-upload-cli --action add --center 'your_center_name' --data example_data/*gz --dev --secret .secret.yml --isa_json tests/test_data/simple_test_case_v2.json --isa_assay_stream "Ena stream 1" ``` * **Release submission**