diff --git a/.github/workflows/ci-schema-convert.yml b/.github/workflows/ci-schema-convert.yml index 2a90e6d1..213bd1cb 100644 --- a/.github/workflows/ci-schema-convert.yml +++ b/.github/workflows/ci-schema-convert.yml @@ -11,10 +11,10 @@ on: - '.github/workflows/ci-schema-convert.yml' - 'utils/**' - 'model_contexts/**' + workflow_dispatch: env: - SCHEMATIC_VERSION: 24.11.2 - SCHEMATIC_SERVICE_ACCOUNT_CREDS: ${{ secrets.SCHEMATIC_SERVICE_ACCOUNT_CREDS }} + CLIENT_VERSION: 4.11.0 jobs: build: @@ -28,28 +28,20 @@ jobs: # Set up supported python. - uses: actions/setup-python@v5 with: - python-version: '3.10.12' + python-version: '3.10.19' - name: Install Python Packages run: | - pip install schematicpy==${{ env.SCHEMATIC_VERSION }} - pip show schematicpy - - - name: Create creds file - run: | - echo "${SCHEMATIC_SERVICE_ACCOUNT_CREDS}" > schematic_service_account_creds.json + pip install "synapseclient[pandas, curator]==${{ env.CLIENT_VERSION }}" + pip show synapseclient - name: Create context-specific models run: | python utils/context_specific_models.py - - name: convert model csv to jsonld - run: | - bash utils/schema_convert.sh - - - name: Generate templates + - name: Generate json schemas run: | - bash utils/generate_model_templates.sh + python utils/generate_jsonschema.py - name: Generate blank CSV templates from json schema to support dictionary site build run: | @@ -59,10 +51,6 @@ jobs: run: | rm model_json_schema/ark.BDM* - - name: Clean up sensitive info - run: | - rm schematic_service_account_creds.json - - name: Commit files if there are changes run: | git status diff --git a/.github/workflows/create-template-config.yml b/.github/workflows/create-template-config.yml deleted file mode 100644 index c9f1683b..00000000 --- a/.github/workflows/create-template-config.yml +++ /dev/null @@ -1,80 +0,0 @@ -# -------------------------------------------------------------------------------------------------- -# GitHub Action to create a DCA template config json file for a data model -# -# This action creates a json file named with the `file` input argument using -# the data model supplied to the `data_model` argument. It will validate the -# json file against DCA's template config schema. Finally, it will create a PR -# in the repo for the new file. -# -# Copy this into your data model repo .github/workflow directory. -# Your repo settings must have Actions enabled and must allow GitHub Actions to -# create and approve pull requests. -# -# By default, this action runs by workflow dispatch. But it can be configured -# to run on other triggers. Consult the github doc below for more information. -# https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#workflow_dispatch -# -# The resulting file with contain one entry for each attribute in the data model that `dependsOn` -# "Component". To include a subset of these attributes, use either `include_data_types` or -# `exclude_data_types` in the call to `datacurator::write_dca_template_config()` -# -# -------------------------------------------------------------------------------------------------- - -name: DCA Template Config File -on: - workflow_dispatch: - inputs: - data_model: - description: URL to a jsonld data model file - required: true - file: - description: Directory to save the template config - required: true - include_data_types: - description: Space-separated string of data types to include in output. Must be empty if using exclude_data_types. - required: false - exclude_data_types: - description: Space-separated string of data types to exclude from output. Must be empty if using include_data_types. - required: false - data_model_labels: - description: How schematic gets data model labels. Defaults `class_label`. - default: 'class_label' - required: true - -jobs: - create-template-config: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Checkout DCA Config Repo for Schema - uses: actions/checkout@v4 - with: - repository: 'Sage-Bionetworks/data_curator_config' - ref: 'dev' - path: 'data_curator_config' - - - name: Create config file - uses: sage-bionetworks/dca-template-config-action@main - env: - data_model: ${{ inputs.data_model }} - file: ${{ inputs.file }} - include_data_types: ${{ inputs.include_data_types }} - exclude_data_types: ${{ inputs.exclude_data_types }} - data_model_labels: ${{ inputs.data_model_labels }} - - - name: Validate Config File - uses: docker://orrosenblatt/validate-json-action:latest - env: - INPUT_SCHEMA: 'data_curator_config/schemas/dca_template_config.schema.json' - INPUT_JSONS: ${{ inputs.file }} - - - name: Open PR - uses: peter-evans/create-pull-request@v5 - with: - title: Update DCA Template Config File - body: Recreate the json file that populates the DCA template dropdown menu. - delete-branch: true - branch-suffix: timestamp - add-paths: | - ${{ inputs.file }} diff --git a/model_json_schema/ark.BiospecimenMetadataTemplate.schema.json b/model_json_schema/ark.BiospecimenMetadataTemplate.schema.json index 5b65cadd..a76e58de 100644 --- a/model_json_schema/ark.BiospecimenMetadataTemplate.schema.json +++ b/model_json_schema/ark.BiospecimenMetadataTemplate.schema.json @@ -1,1152 +1,1080 @@ { + "$id": "http://example.com/BiospecimenMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "notes": {}, - "program": { - "enum": [ - "AMP AIM", - "Community Contribution", - "AMP RA/SLE" - ] - }, - "project": { - "type": "array", - "items": { - "enum": [ - "UMass V-CoRT", - "RA", - "LOCKIT", - "AIM for RA", - "ELLIPSS", - "STAMP", - "SLE" - ] - }, - "maxItems": 7 - }, - "biospecimenType": { - "enum": [ - "salivary gland", - "fibroblast-like synoviocyte", - "suction blister cells", - "PBMCs", - "none", - "saliva", - "skin swab", - "skin biopsy", - "primary cell culture", - "urine", - "synovial tissue", - "whole blood", - "kidney biopsy", - "cell line", - "synovial fluid", - "plasma", - "uvea", - "suction blister fluid", - "serum", - "stool", - "total leukocytes" - ] - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "biospecimenID": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "individualID": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "altSampleID": {}, - "parentBiospecimenID": {}, - "sampleCollectionBatch": {}, - "biospecimenSubtype": { - "enum": [ - "nuclei suspension", - "cell or tissue lysate", - "FFPE tissue", - "fresh tissue", - "flow-sorted cells", - "frozen tissue", - "cell suspension", - "PFA-fixed tissue", - "supernatant", - "" - ] - }, - "visitID": {}, - "skinSiteStatus": { - "enum": [ - "healthy control", - "lesional proximal", - "lesional", - "non-lesional", - "" - ] - }, - "anatomicalSite": { - "enum": [ - "right wrist joint", - "right 1st MTP joint", - "right 2nd MTP joint", - "right 2nd MCP joint", - "left hip joint", - "right 3rd MCP joint", - "right hip joint", - "left ankle joint", - "left wrist joint", - "right ankle joint", - "right knee joint", - "left 2nd MCP joint", - "unknown", - "other site", - "left knee joint", - "" - ] - }, - "salivaCollectionProcedure": { - "enum": [ - "unstimulated", - "stimulated", - "" - ] - }, - "primaryCellSource": { - "enum": [ - "salivary gland", - "pannus-derived epidermis", - "uvea", - "PBMCs", - "synovial tissue", - "kidney", - "whole blood", - "pannus-derived dermis", - "urine", - "total leukocytes", - "" - ] - }, - "cellType": {}, - "cellOntologyID": {}, - "krennLining": {}, - "krennInflammatory": {}, - "synovialCollectionProcedure": { - "enum": [ - "unknown", - "biopsy", - "synovectomy", - "arthroplasty", - "" - ] - }, - "krennStroma": {}, - "krennSynovitisScore": {}, - "userDefinedCellType": {}, - "FACSPopulation": {} - }, - "required": [ - "program", - "project", - "biospecimenType", - "Component", - "biospecimenID", - "individualID" - ], "allOf": [ { "if": { "properties": { - "program": { + "BiospecimenSubtype": { "enum": [ - "AMP AIM" + "Cellsuspension" ] } - }, - "required": [ - "program" - ] + } }, "then": { "properties": { - "visitID": { + "CellOntologyID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "visitID" + "CellOntologyID" ] } }, { "if": { "properties": { - "program": { + "BiospecimenSubtype": { "enum": [ - "AMP RA/SLE" + "Flow-sortedcells" ] } - }, - "required": [ - "program" - ] + } }, "then": { "properties": { - "visitID": { + "CellOntologyID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "visitID" + "CellOntologyID" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "suction blister cells" + "Cellline" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "skinSiteStatus": { - "enum": [ - "healthy control", - "lesional proximal", - "lesional", - "non-lesional" - ] + "CellOntologyID": { + "not": { + "type": "null" + } } }, "required": [ - "skinSiteStatus" + "CellOntologyID" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "skin swab" + "Primarycellculture" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "skinSiteStatus": { - "enum": [ - "healthy control", - "lesional proximal", - "lesional", - "non-lesional" - ] + "CellOntologyID": { + "not": { + "type": "null" + } } }, "required": [ - "skinSiteStatus" + "CellOntologyID" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenSubtype": { "enum": [ - "skin biopsy" + "Cellsuspension" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "skinSiteStatus": { - "enum": [ - "healthy control", - "lesional proximal", - "lesional", - "non-lesional" - ] + "CellType": { + "not": { + "type": "null" + } } }, "required": [ - "skinSiteStatus" + "CellType" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenSubtype": { "enum": [ - "suction blister fluid" + "Flow-sortedcells" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "skinSiteStatus": { - "enum": [ - "healthy control", - "lesional proximal", - "lesional", - "non-lesional" - ] + "CellType": { + "not": { + "type": "null" + } } }, "required": [ - "skinSiteStatus" + "CellType" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "suction blister cells" + "Cellline" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right wrist joint", - "right 1st MTP joint", - "right 2nd MTP joint", - "right 2nd MCP joint", - "left hip joint", - "right 3rd MCP joint", - "right hip joint", - "left ankle joint", - "left wrist joint", - "right ankle joint", - "right knee joint", - "left 2nd MCP joint", - "unknown", - "other site", - "left knee joint" - ] + "CellType": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "CellType" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "skin swab" + "Primarycellculture" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right wrist joint", - "right 1st MTP joint", - "right 2nd MTP joint", - "right 2nd MCP joint", - "left hip joint", - "right 3rd MCP joint", - "right hip joint", - "left ankle joint", - "left wrist joint", - "right ankle joint", - "right knee joint", - "left 2nd MCP joint", - "unknown", - "other site", - "left knee joint" - ] + "CellType": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "CellType" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenSubtype": { "enum": [ - "skin biopsy" + "Cellsuspension" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right wrist joint", - "right 1st MTP joint", - "right 2nd MTP joint", - "right 2nd MCP joint", - "left hip joint", - "right 3rd MCP joint", - "right hip joint", - "left ankle joint", - "left wrist joint", - "right ankle joint", - "right knee joint", - "left 2nd MCP joint", - "unknown", - "other site", - "left knee joint" - ] + "UserDefinedCellType": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "UserDefinedCellType" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenSubtype": { "enum": [ - "synovial tissue" + "Flow-sortedcells" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right wrist joint", - "right 1st MTP joint", - "right 2nd MTP joint", - "right 2nd MCP joint", - "left hip joint", - "right 3rd MCP joint", - "right hip joint", - "left ankle joint", - "left wrist joint", - "right ankle joint", - "right knee joint", - "left 2nd MCP joint", - "unknown", - "other site", - "left knee joint" - ] + "UserDefinedCellType": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "UserDefinedCellType" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenSubtype": { "enum": [ - "synovial fluid" + "Flow-sortedcells" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right wrist joint", - "right 1st MTP joint", - "right 2nd MTP joint", - "right 2nd MCP joint", - "left hip joint", - "right 3rd MCP joint", - "right hip joint", - "left ankle joint", - "left wrist joint", - "right ankle joint", - "right knee joint", - "left 2nd MCP joint", - "unknown", - "other site", - "left knee joint" - ] + "FACSPopulation": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "FACSPopulation" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "suction blister fluid" + "Primarycellculture" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right wrist joint", - "right 1st MTP joint", - "right 2nd MTP joint", - "right 2nd MCP joint", - "left hip joint", - "right 3rd MCP joint", - "right hip joint", - "left ankle joint", - "left wrist joint", - "right ankle joint", - "right knee joint", - "left 2nd MCP joint", - "unknown", - "other site", - "left knee joint" - ] + "PrimaryCellSource": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "PrimaryCellSource" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "saliva" + "Saliva" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "salivaCollectionProcedure": { - "enum": [ - "unstimulated", - "stimulated" - ] + "SalivaCollectionProcedure": { + "not": { + "type": "null" + } } }, "required": [ - "salivaCollectionProcedure" + "SalivaCollectionProcedure" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "primary cell culture" + "Skinbiopsy" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "primaryCellSource": { - "enum": [ - "salivary gland", - "pannus-derived epidermis", - "uvea", - "PBMCs", - "synovial tissue", - "kidney", - "whole blood", - "pannus-derived dermis", - "urine", - "total leukocytes" - ] + "AnatomicalSite": { + "not": { + "type": "null" + } } }, "required": [ - "primaryCellSource" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "primary cell culture" + "Skinswab" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "cellType": {} + "AnatomicalSite": { + "not": { + "type": "null" + } + } }, "required": [ - "cellType" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "cell line" + "Suctionblistercells" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "cellType": {} + "AnatomicalSite": { + "not": { + "type": "null" + } + } }, "required": [ - "cellType" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "BiospecimenType": { "enum": [ - "flow-sorted cells" + "Suctionblisterfluid" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "cellType": {} + "AnatomicalSite": { + "not": { + "type": "null" + } + } }, "required": [ - "cellType" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "BiospecimenType": { "enum": [ - "cell suspension" + "Synovialfluid" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "cellType": {} + "AnatomicalSite": { + "not": { + "type": "null" + } + } }, "required": [ - "cellType" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "primary cell culture" + "Synovialtissue" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "cellOntologyID": {} + "AnatomicalSite": { + "not": { + "type": "null" + } + } }, "required": [ - "cellOntologyID" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenType": { + "PrimaryCellSource": { "enum": [ - "cell line" + "Synovialtissue" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "cellOntologyID": {} + "AnatomicalSite": { + "not": { + "type": "null" + } + } }, "required": [ - "cellOntologyID" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "BiospecimenType": { "enum": [ - "flow-sorted cells" + "Skinbiopsy" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "cellOntologyID": {} + "SkinSiteStatus": { + "not": { + "type": "null" + } + } }, "required": [ - "cellOntologyID" + "SkinSiteStatus" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "BiospecimenType": { "enum": [ - "cell suspension" + "Skinswab" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "cellOntologyID": {} + "SkinSiteStatus": { + "not": { + "type": "null" + } + } }, "required": [ - "cellOntologyID" + "SkinSiteStatus" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Suctionblistercells" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennLining": {} + "SkinSiteStatus": { + "not": { + "type": "null" + } + } }, "required": [ - "krennLining" + "SkinSiteStatus" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Suctionblisterfluid" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennLining": {} + "SkinSiteStatus": { + "not": { + "type": "null" + } + } }, "required": [ - "krennLining" + "SkinSiteStatus" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Synovialfluid" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennInflammatory": {} + "SynovialCollectionProcedure": { + "not": { + "type": "null" + } + } }, "required": [ - "krennInflammatory" + "SynovialCollectionProcedure" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennInflammatory": {} + "SynovialCollectionProcedure": { + "not": { + "type": "null" + } + } }, "required": [ - "krennInflammatory" + "SynovialCollectionProcedure" ] } }, { "if": { "properties": { - "primaryCellSource": { + "PrimaryCellSource": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "synovialCollectionProcedure": { - "enum": [ - "unknown", - "biopsy", - "synovectomy", - "arthroplasty" - ] + "SynovialCollectionProcedure": { + "not": { + "type": "null" + } } }, "required": [ - "synovialCollectionProcedure" + "SynovialCollectionProcedure" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "synovialCollectionProcedure": { - "enum": [ - "unknown", - "biopsy", - "synovectomy", - "arthroplasty" - ] + "KrennInflammatory": { + "not": { + "type": "null" + } } }, "required": [ - "synovialCollectionProcedure" + "KrennInflammatory" ] } }, { "if": { "properties": { - "biospecimenType": { + "PrimaryCellSource": { "enum": [ - "synovial fluid" + "Synovialtissue" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "synovialCollectionProcedure": { - "enum": [ - "unknown", - "biopsy", - "synovectomy", - "arthroplasty" - ] + "KrennInflammatory": { + "not": { + "type": "null" + } } }, "required": [ - "synovialCollectionProcedure" + "KrennInflammatory" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennStroma": {} + "KrennLining": { + "not": { + "type": "null" + } + } }, "required": [ - "krennStroma" + "KrennLining" ] } }, { "if": { "properties": { - "primaryCellSource": { + "PrimaryCellSource": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennStroma": {} + "KrennLining": { + "not": { + "type": "null" + } + } }, "required": [ - "krennStroma" + "KrennLining" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennSynovitisScore": {} + "KrennStroma": { + "not": { + "type": "null" + } + } }, "required": [ - "krennSynovitisScore" + "KrennStroma" ] } }, { "if": { "properties": { - "primaryCellSource": { + "PrimaryCellSource": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennSynovitisScore": {} + "KrennStroma": { + "not": { + "type": "null" + } + } }, "required": [ - "krennSynovitisScore" + "KrennStroma" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "BiospecimenType": { "enum": [ - "flow-sorted cells" + "Synovialtissue" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "userDefinedCellType": {} + "KrennSynovitisScore": { + "not": { + "type": "null" + } + } }, "required": [ - "userDefinedCellType" + "KrennSynovitisScore" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "PrimaryCellSource": { "enum": [ - "cell suspension" + "Synovialtissue" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "userDefinedCellType": {} + "KrennSynovitisScore": { + "not": { + "type": "null" + } + } }, "required": [ - "userDefinedCellType" + "KrennSynovitisScore" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "Program": { "enum": [ - "flow-sorted cells" + "AMPAIM" ] } + } + }, + "then": { + "properties": { + "VisitID": { + "not": { + "type": "null" + } + } }, "required": [ - "biospecimenSubtype" + "VisitID" ] + } + }, + { + "if": { + "properties": { + "Program": { + "enum": [ + "AMPRA/SLE" + ] + } + } }, "then": { "properties": { - "FACSPopulation": { + "VisitID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "FACSPopulation" + "VisitID" ] } } - ] + ], + "description": "A general template outlining metadata to be collected for biospecimen profiled in a dataset.", + "properties": { + "AltSampleID": { + "description": "An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.", + "title": "altSampleID", + "type": "string" + }, + "AnatomicalSite": { + "description": "The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.", + "enum": [ + "left 2nd MCP joint", + "left ankle joint", + "left hip joint", + "left knee joint", + "left wrist joint", + "other site", + "right 1st MTP joint", + "right 2nd MCP joint", + "right 2nd MTP joint", + "right 3rd MCP joint", + "right ankle joint", + "right hip joint", + "right knee joint", + "right wrist joint", + "unknown" + ], + "title": "anatomicalSite" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "biospecimenID", + "type": "string" + }, + "BiospecimenSubtype": { + "description": "Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.", + "enum": [ + "FFPE tissue", + "PFA-fixed tissue", + "cell or tissue lysate", + "cell suspension", + "flow-sorted cells", + "fresh tissue", + "frozen tissue", + "nuclei suspension", + "supernatant" + ], + "title": "biospecimenSubtype" + }, + "BiospecimenType": { + "description": "A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.", + "enum": [ + "PBMCs", + "cell line", + "fibroblast-like synoviocyte", + "kidney biopsy", + "none", + "plasma", + "primary cell culture", + "saliva", + "salivary gland", + "serum", + "skin biopsy", + "skin swab", + "stool", + "suction blister cells", + "suction blister fluid", + "synovial fluid", + "synovial tissue", + "total leukocytes", + "urine", + "uvea", + "whole blood" + ], + "title": "biospecimenType" + }, + "CellOntologyID": { + "description": "Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.", + "pattern": "^CL:", + "title": "cellOntologyID", + "type": "string" + }, + "CellType": { + "description": "The cell type name from Cell Ontology for the corresponding CL identifier.", + "title": "cellType", + "type": "string" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "FACSPopulation": { + "description": "A description of the marker gating strategy used to derive the population cells with FACS.", + "title": "FACSPopulation", + "type": "string" + }, + "IndividualID": { + "description": "Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "individualID", + "type": "string" + }, + "KrennInflammatory": { + "description": "A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.", + "title": "krennInflammatory", + "type": "number" + }, + "KrennLining": { + "description": "A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.", + "title": "krennLining", + "type": "number" + }, + "KrennStroma": { + "description": "A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.", + "title": "krennStroma", + "type": "number" + }, + "KrennSynovitisScore": { + "description": "The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.", + "title": "krennSynovitisScore", + "type": "number" + }, + "Notes": { + "description": "Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.", + "title": "notes", + "type": "string" + }, + "ParentBiospecimenID": { + "description": "The biospecimenID associated with the originating biospecimen for derived or child biospecimens.", + "title": "parentBiospecimenID", + "type": "string" + }, + "PrimaryCellSource": { + "description": "A label indicating the biological source material from which a primary cell culture was derived.", + "enum": [ + "PBMCs", + "kidney", + "pannus-derived dermis", + "pannus-derived epidermis", + "salivary gland", + "synovial tissue", + "total leukocytes", + "urine", + "uvea", + "whole blood" + ], + "title": "primaryCellSource" + }, + "Program": { + "description": "Name of the funding program that supported the generation of data and associated files", + "enum": [ + "AMP AIM", + "AMP RA/SLE", + "Community Contribution" + ], + "title": "program" + }, + "Project": { + "description": "A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.", + "enum": [ + "AIM for RA", + "ELLIPSS", + "LOCKIT", + "RA", + "SLE", + "STAMP", + "UMass V-CoRT" + ], + "title": "project" + }, + "SalivaCollectionProcedure": { + "description": "Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.", + "enum": [ + "stimulated", + "unstimulated" + ], + "title": "salivaCollectionProcedure" + }, + "SampleCollectionBatch": { + "description": "A label indicating batching of sample collection or experiment execution that occurs prior to data collection.", + "title": "sampleCollectionBatch", + "type": "string" + }, + "SkinSiteStatus": { + "description": "Disease manifestation status of skin biospecimen.", + "enum": [ + "healthy control", + "lesional", + "lesional proximal", + "non-lesional" + ], + "title": "skinSiteStatus" + }, + "SynovialCollectionProcedure": { + "description": "Classification of procedure for synovial tissue collection.", + "enum": [ + "arthroplasty", + "biopsy", + "synovectomy", + "unknown" + ], + "title": "synovialCollectionProcedure" + }, + "UserDefinedCellType": { + "description": "User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.", + "title": "userDefinedCellType", + "type": "string" + }, + "VisitID": { + "description": "Ordinal ID distinguishing different patient visits.", + "title": "visitID", + "type": "string" + } + }, + "required": [ + "BiospecimenID", + "BiospecimenType", + "Component", + "IndividualID", + "Program", + "Project" + ], + "title": "BiospecimenMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.BulkATAC-seqAssayMetadataTemplate.schema.json b/model_json_schema/ark.BulkATAC-seqAssayMetadataTemplate.schema.json index eb7004a2..8caf872b 100644 --- a/model_json_schema/ark.BulkATAC-seqAssayMetadataTemplate.schema.json +++ b/model_json_schema/ark.BulkATAC-seqAssayMetadataTemplate.schema.json @@ -1,330 +1,332 @@ { + "$id": "http://example.com/BulkATAC-seqAssayMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "totalReads": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "specimenModality": { - "enum": [ - "unknown", - "single specimen", - "multispecimen" - ] - }, - "softwareAndVersion": { - "enum": [ - "Cell Ranger v6.0.1", - "Space Ranger 3.1.0", - "Cell Ranger v5.0.1", - "Cell Ranger v9.0.0", - "Cell Ranger v6.0.0", - "Cell Ranger v7.1.0", - "Cell Ranger v3.0.1", - "Space Ranger 3.1.1", - "Cell Ranger v5.0.0", - "Cell Ranger v6.1.2", - "Cell Ranger v8.0.1", - "Cell Ranger ATAC v1.1.0", - "Space Ranger 3.0.1", - "Space Ranger 3.1.3", - "BD FACSDiva 8.0.1", - "Cell Ranger v7.2.0", - "demuxlet", - "Cell Ranger v6.1.0", - "Cell Ranger v3.0.0", - "Cell Ranger v7.0.0", - "Cell Ranger v4.0.0", - "Cell Ranger v3.1.0", - "Space Ranger 3.0.0", - "Cell Ranger 9.0.1", - "Cell Ranger v7.0.1", - "Cell Ranger v6.1.1", - "Cell Ranger v6.0.2", - "Cell Ranger v8.0.0", - "Space Ranger 3.1.2", - "Cell Ranger v3.0.2", - "" - ] - }, - "libraryPrepMethod": { - "enum": [ - "SMART-Seq Human BCR with UMI", - "NEBNext Human Immune Sequencing Kit", - "10x Chromium Next GEM Single Cell 5' v1.1", - "Chromium Next GEM Single Cell ATAC v1.1", - "10x Chromium GEM-X Single Cell 5' v3", - "10x Chromium Fixed RNA Human Transcriptome", - "QIAseq miRNA Library", - "CEL-Seq2", - "Nextera XT", - "Takara Human BCR profiling for Illumina", - "Takara Human TCRv2 profiling for Illumina", - "SMART-Seq v4 Ultra Low Input RNA", - "custom DASH-treatment", - "10x Chromium Next GEM Single Cell ATAC v2", - "10x Chromium GEM-X Single Cell 3' v4", - "Takara Human scTCR profiling for Illumina", - "in-house library prep", - "10x GEM-X Flex Gene Expression Human", - "10x Chromium Next GEM Single Cell 3'", - "Fluidigm C1 HT", - "TruSeq Stranded mRNA", - "SMARTer Stranded Total RNA v2", - "10x Chromium Single Cell Human TCR", - "10x Chromium Next GEM Single Cell 3' 3.1", - "10x Chromium Single Cell Human BCR", - "Nextera XT DNA", - "10x Chromium Next GEM Single Cell 5' v2", - "SMART-Seq Human TCR with UMI", - "Takara Human TCR profiling for Illumina", - "10x GEM-X Universal 5' Gene Expression v3", - "NEBNext Ultra II Directional RNA Library" - ] - }, - "dataCollectionBatch": {}, - "nucleicAcidSource": { - "type": "array", - "items": { - "enum": [ - "TCR mRNA", - "intracellular protein feature barcode", - "Tn5-accessible gDNA", - "antigen capture barcode", - "multiplexing oligo", - "poly(A) RNA", - "rRNA-depleted RNA", - "CRISPR protospacer feature barcode", - "BCR mRNA", - "gDNA", - "surface protein feature barcode", - "globin-depleted RNA" - ] - }, - "maxItems": 12 - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "sampleProcessingBatch": {}, - "assay": { - "type": "array", - "items": { - "enum": [ - "Xenium", - "snRNASeq", - "Visium", - "WGS", - "feature barcode sequencing", - "imaging mass cytometry", - "LC-MS/MS", - "NULISA", - "VDJSeq", - "Olink Target 96", - "WES", - "kiloplex", - "ASAPSeq", - "flow cytometry", - "snATACSeq", - "SomaScan", - "GenePS SeqFISH", - "Olink Reveal", - "Olink Explore HT", - "RNASeq", - "Olink Target 48", - "imaging mass spectrometry", - "CITESeq", - "CyTOF", - "SNP array", - "scVDJSeq", - "scRNASeq", - "CE-MS", - "Olink Flex", - "CosMX", - "serial IHC", - "Olink Focus", - "multiplexed ELISA", - "H&E" - ] - }, - "maxItems": 34 - }, - "alignmentReference": { - "enum": [ - "10x Cell Ranger Human GRCh38 2024-A", - "vdj_GRCh38_alts_ensembl-4.0.0", - "modified GRCh38", - "GRCh38", - "unknown", - "10x Cell Ranger Human GRCh38 2020-A" - ] - }, - "platform": { - "type": "array", - "items": { - "enum": [ - "Chromium Xo", - "Illumina NextSeq 500", - "Chromium GEM-X Single Cell 3' Chip v4", - "BD FACSMelody", - "Xenium", - "Chromium Next GEM Chip G", - "Chromium X", - "Fluidigm BioMark", - "BD FACSLyric Clinical", - "unknown", - "GEM-X Flex Gene Expression Chip", - "Helios Mass Cytometer", - "BD FACSAria Fusion cell sorter", - "Hyperion", - "BD FACSDiscover S8", - "Chromium Next GEM Chip M", - "Cytek Aurora", - "Not Applicable", - "CyTOF XT", - "Illumina HiSeq X Ten", - "Illumina NovaSeq X", - "Olink Signature Q100", - "Illumina HiSeq 2500", - "BD FACSDiscover A8", - "BD FACSymphony S6", - "Illumina NovaSeq 6000", - "Chromium Next GEM Chip K", - "none", - "Thermo Fisher Attune NxT", - "Chromium Next GEM Chip Q", - "BD FACSAria III", - "Chromium Controller", - "Chromium iX", - "BD FACSCanto", - "Thermo Fisher Attune Xenith", - "Thermo Fisher Attune CytPix", - "Chromium Next GEM Chip H", - "Cytek Aurora Evo", - "BD LSRFortessa", - "Visium CytAssist", - "GEM-X OCM 5' Chip", - "Sony MA900", - "BD FACSCanto II" - ] - }, - "maxItems": 43 - }, - "biospecimenID": {}, - "libraryID": {}, - "10xProbeSetReference": { - "enum": [ - "custom probe set", - "Flex Human Transcriptome Probe Set v1.0.1", - "Visium Human Transcriptome v1", - "Visium Human Transcriptome v2", - "Flex Human Transcriptome Probe Set v1.1.0", - "" - ] - } - }, - "required": [ - "totalReads", - "specimenModality", - "libraryPrepMethod", - "nucleicAcidSource", - "Component", - "assay", - "alignmentReference", - "platform" - ], "allOf": [ { "if": { "properties": { - "specimenModality": { + "LibraryPrepMethod": { "enum": [ - "single specimen" + "10xGEM-XFlexGeneExpressionHuman" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "10xProbeSetReference": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "10xProbeSetReference" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "multispecimen" + "Multispecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "libraryID": { + "LibraryID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "libraryID" + "LibraryID" ] } }, { "if": { "properties": { - "libraryPrepMethod": { + "SpecimenModality": { "enum": [ - "10x GEM-X Flex Gene Expression Human" + "Singlespecimen" ] } - }, - "required": [ - "libraryPrepMethod" - ] + } }, "then": { "properties": { - "10xProbeSetReference": { - "enum": [ - "custom probe set", - "Flex Human Transcriptome Probe Set v1.0.1", - "Visium Human Transcriptome v1", - "Visium Human Transcriptome v2", - "Flex Human Transcriptome Probe Set v1.1.0" - ] + "BiospecimenID": { + "not": { + "type": "null" + } } }, "required": [ - "10xProbeSetReference" + "BiospecimenID" ] } } - ] + ], + "description": "A template outlining metadata to be collected for each library in a bulk ATAC-seq dataset.", + "properties": { + "10xProbeSetReference": { + "description": "Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.", + "enum": [ + "Flex Human Transcriptome Probe Set v1.0.1", + "Flex Human Transcriptome Probe Set v1.1.0", + "Visium Human Transcriptome v1", + "Visium Human Transcriptome v2", + "custom probe set" + ], + "title": "10xProbeSetReference" + }, + "AlignmentReference": { + "description": "The genomic/transcriptomic reference used for performing read alignment against.", + "enum": [ + "10x Cell Ranger Human GRCh38 2020-A", + "10x Cell Ranger Human GRCh38 2024-A", + "GRCh38", + "modified GRCh38", + "unknown", + "vdj_GRCh38_alts_ensembl-4.0.0" + ], + "title": "alignmentReference" + }, + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "enum": [ + "ASAPSeq", + "CE-MS", + "CITESeq", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", + "LC-MS/MS", + "NULISA", + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", + "WES", + "WGS", + "Xenium", + "feature barcode sequencing", + "flow cytometry", + "imaging mass cytometry", + "imaging mass spectrometry", + "kiloplex", + "multiplexed ELISA", + "scRNASeq", + "scVDJSeq", + "serial IHC", + "snATACSeq", + "snRNASeq" + ], + "title": "assay" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "biospecimenID", + "type": "string" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataCollectionBatch": { + "description": "A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.", + "title": "dataCollectionBatch", + "type": "string" + }, + "LibraryID": { + "description": "A library label or name, unique within an experiment, used to distinguish sequencing libraries.", + "title": "libraryID", + "type": "string" + }, + "LibraryPrepMethod": { + "description": "Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.", + "enum": [ + "10x Chromium Fixed RNA Human Transcriptome", + "10x Chromium GEM-X Single Cell 3' v4", + "10x Chromium GEM-X Single Cell 5' v3", + "10x Chromium Next GEM Single Cell 3'", + "10x Chromium Next GEM Single Cell 3' 3.1", + "10x Chromium Next GEM Single Cell 5' v1.1", + "10x Chromium Next GEM Single Cell 5' v2", + "10x Chromium Next GEM Single Cell ATAC v2", + "10x Chromium Single Cell Human BCR", + "10x Chromium Single Cell Human TCR", + "10x GEM-X Flex Gene Expression Human", + "10x GEM-X Universal 5' Gene Expression v3", + "CEL-Seq2", + "Chromium Next GEM Single Cell ATAC v1.1", + "Fluidigm C1 HT", + "NEBNext Human Immune Sequencing Kit", + "NEBNext Ultra II Directional RNA Library", + "Nextera XT", + "Nextera XT DNA", + "QIAseq miRNA Library", + "SMART-Seq Human BCR with UMI", + "SMART-Seq Human TCR with UMI", + "SMART-Seq v4 Ultra Low Input RNA", + "SMARTer Stranded Total RNA v2", + "Takara Human BCR profiling for Illumina", + "Takara Human TCR profiling for Illumina", + "Takara Human TCRv2 profiling for Illumina", + "Takara Human scTCR profiling for Illumina", + "TruSeq Stranded mRNA", + "custom DASH-treatment", + "in-house library prep" + ], + "title": "libraryPrepMethod" + }, + "NucleicAcidSource": { + "description": "The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.", + "enum": [ + "BCR mRNA", + "CRISPR protospacer feature barcode", + "TCR mRNA", + "Tn5-accessible gDNA", + "antigen capture barcode", + "gDNA", + "globin-depleted RNA", + "intracellular protein feature barcode", + "multiplexing oligo", + "poly(A) RNA", + "rRNA-depleted RNA", + "surface protein feature barcode" + ], + "title": "nucleicAcidSource" + }, + "Platform": { + "description": "The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.", + "enum": [ + "BD FACSAria Fusion cell sorter", + "BD FACSAria III", + "BD FACSCanto", + "BD FACSCanto II", + "BD FACSDiscover A8", + "BD FACSDiscover S8", + "BD FACSLyric Clinical", + "BD FACSMelody", + "BD FACSymphony S6", + "BD LSRFortessa", + "Chromium Controller", + "Chromium GEM-X Single Cell 3' Chip v4", + "Chromium Next GEM Chip G", + "Chromium Next GEM Chip H", + "Chromium Next GEM Chip K", + "Chromium Next GEM Chip M", + "Chromium Next GEM Chip Q", + "Chromium X", + "Chromium Xo", + "Chromium iX", + "CyTOF XT", + "Cytek Aurora", + "Cytek Aurora Evo", + "Fluidigm BioMark", + "GEM-X Flex Gene Expression Chip", + "GEM-X OCM 5' Chip", + "Helios Mass Cytometer", + "Hyperion", + "Illumina HiSeq 2500", + "Illumina HiSeq X Ten", + "Illumina NextSeq 500", + "Illumina NovaSeq 6000", + "Illumina NovaSeq X", + "Not Applicable", + "Olink Signature Q100", + "Sony MA900", + "Thermo Fisher Attune CytPix", + "Thermo Fisher Attune NxT", + "Thermo Fisher Attune Xenith", + "Visium CytAssist", + "Xenium", + "none", + "unknown" + ], + "title": "platform" + }, + "SampleProcessingBatch": { + "description": "A label indicating batching of sample processing or preparation that occurs prior to data collection.", + "title": "sampleProcessingBatch", + "type": "string" + }, + "SoftwareAndVersion": { + "description": "Relevant software and version used to generate the data file.", + "enum": [ + "BD FACSDiva 8.0.1", + "Cell Ranger 9.0.1", + "Cell Ranger ATAC v1.1.0", + "Cell Ranger v3.0.0", + "Cell Ranger v3.0.1", + "Cell Ranger v3.0.2", + "Cell Ranger v3.1.0", + "Cell Ranger v4.0.0", + "Cell Ranger v5.0.0", + "Cell Ranger v5.0.1", + "Cell Ranger v6.0.0", + "Cell Ranger v6.0.1", + "Cell Ranger v6.0.2", + "Cell Ranger v6.1.0", + "Cell Ranger v6.1.1", + "Cell Ranger v6.1.2", + "Cell Ranger v7.0.0", + "Cell Ranger v7.0.1", + "Cell Ranger v7.1.0", + "Cell Ranger v7.2.0", + "Cell Ranger v8.0.0", + "Cell Ranger v8.0.1", + "Cell Ranger v9.0.0", + "Space Ranger 3.0.0", + "Space Ranger 3.0.1", + "Space Ranger 3.1.0", + "Space Ranger 3.1.1", + "Space Ranger 3.1.2", + "Space Ranger 3.1.3", + "demuxlet" + ], + "title": "softwareAndVersion" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + }, + "TotalReads": { + "description": "Total number of reads sequenced from the library.", + "title": "totalReads", + "type": "integer" + } + }, + "required": [ + "AlignmentReference", + "Assay", + "Component", + "LibraryPrepMethod", + "NucleicAcidSource", + "Platform", + "SpecimenModality", + "TotalReads" + ], + "title": "BulkATAC-seqAssayMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.BulkRNA-seqAssayMetadataTemplate.schema.json b/model_json_schema/ark.BulkRNA-seqAssayMetadataTemplate.schema.json index 551e6af0..ccc0dbe4 100644 --- a/model_json_schema/ark.BulkRNA-seqAssayMetadataTemplate.schema.json +++ b/model_json_schema/ark.BulkRNA-seqAssayMetadataTemplate.schema.json @@ -1,330 +1,332 @@ { + "$id": "http://example.com/BulkRNA-seqAssayMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "dataCollectionBatch": {}, - "sampleProcessingBatch": {}, - "specimenModality": { - "enum": [ - "unknown", - "single specimen", - "multispecimen" - ] - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "libraryPrepMethod": { - "enum": [ - "Nextera XT", - "Chromium Next GEM Single Cell ATAC v1.1", - "Takara Human TCR profiling for Illumina", - "10x GEM-X Universal 5' Gene Expression v3", - "10x Chromium Next GEM Single Cell 3'", - "NEBNext Human Immune Sequencing Kit", - "10x Chromium Single Cell Human TCR", - "10x Chromium Next GEM Single Cell 5' v1.1", - "10x Chromium Next GEM Single Cell 3' 3.1", - "TruSeq Stranded mRNA", - "custom DASH-treatment", - "SMART-Seq Human BCR with UMI", - "10x Chromium GEM-X Single Cell 3' v4", - "Takara Human TCRv2 profiling for Illumina", - "SMART-Seq v4 Ultra Low Input RNA", - "SMARTer Stranded Total RNA v2", - "SMART-Seq Human TCR with UMI", - "Nextera XT DNA", - "Takara Human BCR profiling for Illumina", - "in-house library prep", - "10x Chromium Next GEM Single Cell ATAC v2", - "NEBNext Ultra II Directional RNA Library", - "CEL-Seq2", - "Fluidigm C1 HT", - "QIAseq miRNA Library", - "10x GEM-X Flex Gene Expression Human", - "10x Chromium GEM-X Single Cell 5' v3", - "Takara Human scTCR profiling for Illumina", - "10x Chromium Single Cell Human BCR", - "10x Chromium Fixed RNA Human Transcriptome", - "10x Chromium Next GEM Single Cell 5' v2" - ] - }, - "platform": { - "type": "array", - "items": { - "enum": [ - "Visium CytAssist", - "BD FACSCanto II", - "BD FACSMelody", - "Chromium Xo", - "Helios Mass Cytometer", - "Not Applicable", - "Chromium iX", - "Thermo Fisher Attune NxT", - "none", - "BD FACSAria III", - "BD FACSymphony S6", - "BD FACSCanto", - "Chromium Next GEM Chip H", - "Xenium", - "Cytek Aurora Evo", - "Cytek Aurora", - "Chromium Next GEM Chip M", - "GEM-X Flex Gene Expression Chip", - "BD FACSAria Fusion cell sorter", - "Olink Signature Q100", - "GEM-X OCM 5' Chip", - "Illumina NextSeq 500", - "Thermo Fisher Attune Xenith", - "unknown", - "Hyperion", - "BD FACSDiscover A8", - "Chromium Next GEM Chip G", - "Chromium Next GEM Chip Q", - "Chromium X", - "Illumina NovaSeq X", - "Illumina HiSeq X Ten", - "Fluidigm BioMark", - "BD FACSLyric Clinical", - "CyTOF XT", - "Sony MA900", - "Thermo Fisher Attune CytPix", - "Illumina NovaSeq 6000", - "Chromium GEM-X Single Cell 3' Chip v4", - "BD LSRFortessa", - "BD FACSDiscover S8", - "Chromium Controller", - "Illumina HiSeq 2500", - "Chromium Next GEM Chip K" - ] - }, - "maxItems": 43 - }, - "softwareAndVersion": { - "enum": [ - "BD FACSDiva 8.0.1", - "Cell Ranger v8.0.0", - "Cell Ranger v6.0.0", - "Cell Ranger v6.1.1", - "Space Ranger 3.0.1", - "Space Ranger 3.1.2", - "Cell Ranger v7.0.0", - "Cell Ranger v6.1.0", - "Cell Ranger v5.0.1", - "Cell Ranger ATAC v1.1.0", - "Cell Ranger v3.1.0", - "demuxlet", - "Cell Ranger v7.2.0", - "Cell Ranger v3.0.0", - "Space Ranger 3.0.0", - "Cell Ranger v9.0.0", - "Cell Ranger 9.0.1", - "Cell Ranger v7.1.0", - "Cell Ranger v4.0.0", - "Cell Ranger v3.0.2", - "Cell Ranger v3.0.1", - "Cell Ranger v6.0.1", - "Cell Ranger v5.0.0", - "Space Ranger 3.1.0", - "Cell Ranger v6.0.2", - "Cell Ranger v6.1.2", - "Cell Ranger v7.0.1", - "Cell Ranger v8.0.1", - "Space Ranger 3.1.3", - "Space Ranger 3.1.1", - "" - ] - }, - "totalReads": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "alignmentReference": { - "enum": [ - "vdj_GRCh38_alts_ensembl-4.0.0", - "modified GRCh38", - "GRCh38", - "unknown", - "10x Cell Ranger Human GRCh38 2024-A", - "10x Cell Ranger Human GRCh38 2020-A" - ] - }, - "nucleicAcidSource": { - "type": "array", - "items": { - "enum": [ - "gDNA", - "Tn5-accessible gDNA", - "TCR mRNA", - "intracellular protein feature barcode", - "rRNA-depleted RNA", - "CRISPR protospacer feature barcode", - "BCR mRNA", - "poly(A) RNA", - "antigen capture barcode", - "surface protein feature barcode", - "multiplexing oligo", - "globin-depleted RNA" - ] - }, - "maxItems": 12 - }, - "assay": { - "type": "array", - "items": { - "enum": [ - "CyTOF", - "Visium", - "imaging mass cytometry", - "SomaScan", - "GenePS SeqFISH", - "Olink Target 96", - "Olink Flex", - "RNASeq", - "Olink Target 48", - "Xenium", - "Olink Explore HT", - "Olink Focus", - "ASAPSeq", - "snRNASeq", - "imaging mass spectrometry", - "H&E", - "feature barcode sequencing", - "serial IHC", - "scVDJSeq", - "scRNASeq", - "LC-MS/MS", - "SNP array", - "flow cytometry", - "NULISA", - "WES", - "WGS", - "multiplexed ELISA", - "kiloplex", - "snATACSeq", - "CITESeq", - "CE-MS", - "Olink Reveal", - "CosMX", - "VDJSeq" - ] - }, - "maxItems": 34 - }, - "biospecimenID": {}, - "libraryID": {}, - "10xProbeSetReference": { - "enum": [ - "custom probe set", - "Flex Human Transcriptome Probe Set v1.0.1", - "Visium Human Transcriptome v2", - "Flex Human Transcriptome Probe Set v1.1.0", - "Visium Human Transcriptome v1", - "" - ] - } - }, - "required": [ - "specimenModality", - "Component", - "libraryPrepMethod", - "platform", - "totalReads", - "alignmentReference", - "nucleicAcidSource", - "assay" - ], "allOf": [ { "if": { "properties": { - "specimenModality": { + "LibraryPrepMethod": { "enum": [ - "single specimen" + "10xGEM-XFlexGeneExpressionHuman" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "10xProbeSetReference": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "10xProbeSetReference" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "multispecimen" + "Multispecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "libraryID": { + "LibraryID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "libraryID" + "LibraryID" ] } }, { "if": { "properties": { - "libraryPrepMethod": { + "SpecimenModality": { "enum": [ - "10x GEM-X Flex Gene Expression Human" + "Singlespecimen" ] } - }, - "required": [ - "libraryPrepMethod" - ] + } }, "then": { "properties": { - "10xProbeSetReference": { - "enum": [ - "custom probe set", - "Flex Human Transcriptome Probe Set v1.0.1", - "Visium Human Transcriptome v2", - "Flex Human Transcriptome Probe Set v1.1.0", - "Visium Human Transcriptome v1" - ] + "BiospecimenID": { + "not": { + "type": "null" + } } }, "required": [ - "10xProbeSetReference" + "BiospecimenID" ] } } - ] + ], + "description": "A template outlining metadata to be collected for each library in a bulk RNA-seq dataset.", + "properties": { + "10xProbeSetReference": { + "description": "Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.", + "enum": [ + "Flex Human Transcriptome Probe Set v1.0.1", + "Flex Human Transcriptome Probe Set v1.1.0", + "Visium Human Transcriptome v1", + "Visium Human Transcriptome v2", + "custom probe set" + ], + "title": "10xProbeSetReference" + }, + "AlignmentReference": { + "description": "The genomic/transcriptomic reference used for performing read alignment against.", + "enum": [ + "10x Cell Ranger Human GRCh38 2020-A", + "10x Cell Ranger Human GRCh38 2024-A", + "GRCh38", + "modified GRCh38", + "unknown", + "vdj_GRCh38_alts_ensembl-4.0.0" + ], + "title": "alignmentReference" + }, + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "enum": [ + "ASAPSeq", + "CE-MS", + "CITESeq", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", + "LC-MS/MS", + "NULISA", + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", + "WES", + "WGS", + "Xenium", + "feature barcode sequencing", + "flow cytometry", + "imaging mass cytometry", + "imaging mass spectrometry", + "kiloplex", + "multiplexed ELISA", + "scRNASeq", + "scVDJSeq", + "serial IHC", + "snATACSeq", + "snRNASeq" + ], + "title": "assay" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "biospecimenID", + "type": "string" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataCollectionBatch": { + "description": "A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.", + "title": "dataCollectionBatch", + "type": "string" + }, + "LibraryID": { + "description": "A library label or name, unique within an experiment, used to distinguish sequencing libraries.", + "title": "libraryID", + "type": "string" + }, + "LibraryPrepMethod": { + "description": "Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.", + "enum": [ + "10x Chromium Fixed RNA Human Transcriptome", + "10x Chromium GEM-X Single Cell 3' v4", + "10x Chromium GEM-X Single Cell 5' v3", + "10x Chromium Next GEM Single Cell 3'", + "10x Chromium Next GEM Single Cell 3' 3.1", + "10x Chromium Next GEM Single Cell 5' v1.1", + "10x Chromium Next GEM Single Cell 5' v2", + "10x Chromium Next GEM Single Cell ATAC v2", + "10x Chromium Single Cell Human BCR", + "10x Chromium Single Cell Human TCR", + "10x GEM-X Flex Gene Expression Human", + "10x GEM-X Universal 5' Gene Expression v3", + "CEL-Seq2", + "Chromium Next GEM Single Cell ATAC v1.1", + "Fluidigm C1 HT", + "NEBNext Human Immune Sequencing Kit", + "NEBNext Ultra II Directional RNA Library", + "Nextera XT", + "Nextera XT DNA", + "QIAseq miRNA Library", + "SMART-Seq Human BCR with UMI", + "SMART-Seq Human TCR with UMI", + "SMART-Seq v4 Ultra Low Input RNA", + "SMARTer Stranded Total RNA v2", + "Takara Human BCR profiling for Illumina", + "Takara Human TCR profiling for Illumina", + "Takara Human TCRv2 profiling for Illumina", + "Takara Human scTCR profiling for Illumina", + "TruSeq Stranded mRNA", + "custom DASH-treatment", + "in-house library prep" + ], + "title": "libraryPrepMethod" + }, + "NucleicAcidSource": { + "description": "The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.", + "enum": [ + "BCR mRNA", + "CRISPR protospacer feature barcode", + "TCR mRNA", + "Tn5-accessible gDNA", + "antigen capture barcode", + "gDNA", + "globin-depleted RNA", + "intracellular protein feature barcode", + "multiplexing oligo", + "poly(A) RNA", + "rRNA-depleted RNA", + "surface protein feature barcode" + ], + "title": "nucleicAcidSource" + }, + "Platform": { + "description": "The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.", + "enum": [ + "BD FACSAria Fusion cell sorter", + "BD FACSAria III", + "BD FACSCanto", + "BD FACSCanto II", + "BD FACSDiscover A8", + "BD FACSDiscover S8", + "BD FACSLyric Clinical", + "BD FACSMelody", + "BD FACSymphony S6", + "BD LSRFortessa", + "Chromium Controller", + "Chromium GEM-X Single Cell 3' Chip v4", + "Chromium Next GEM Chip G", + "Chromium Next GEM Chip H", + "Chromium Next GEM Chip K", + "Chromium Next GEM Chip M", + "Chromium Next GEM Chip Q", + "Chromium X", + "Chromium Xo", + "Chromium iX", + "CyTOF XT", + "Cytek Aurora", + "Cytek Aurora Evo", + "Fluidigm BioMark", + "GEM-X Flex Gene Expression Chip", + "GEM-X OCM 5' Chip", + "Helios Mass Cytometer", + "Hyperion", + "Illumina HiSeq 2500", + "Illumina HiSeq X Ten", + "Illumina NextSeq 500", + "Illumina NovaSeq 6000", + "Illumina NovaSeq X", + "Not Applicable", + "Olink Signature Q100", + "Sony MA900", + "Thermo Fisher Attune CytPix", + "Thermo Fisher Attune NxT", + "Thermo Fisher Attune Xenith", + "Visium CytAssist", + "Xenium", + "none", + "unknown" + ], + "title": "platform" + }, + "SampleProcessingBatch": { + "description": "A label indicating batching of sample processing or preparation that occurs prior to data collection.", + "title": "sampleProcessingBatch", + "type": "string" + }, + "SoftwareAndVersion": { + "description": "Relevant software and version used to generate the data file.", + "enum": [ + "BD FACSDiva 8.0.1", + "Cell Ranger 9.0.1", + "Cell Ranger ATAC v1.1.0", + "Cell Ranger v3.0.0", + "Cell Ranger v3.0.1", + "Cell Ranger v3.0.2", + "Cell Ranger v3.1.0", + "Cell Ranger v4.0.0", + "Cell Ranger v5.0.0", + "Cell Ranger v5.0.1", + "Cell Ranger v6.0.0", + "Cell Ranger v6.0.1", + "Cell Ranger v6.0.2", + "Cell Ranger v6.1.0", + "Cell Ranger v6.1.1", + "Cell Ranger v6.1.2", + "Cell Ranger v7.0.0", + "Cell Ranger v7.0.1", + "Cell Ranger v7.1.0", + "Cell Ranger v7.2.0", + "Cell Ranger v8.0.0", + "Cell Ranger v8.0.1", + "Cell Ranger v9.0.0", + "Space Ranger 3.0.0", + "Space Ranger 3.0.1", + "Space Ranger 3.1.0", + "Space Ranger 3.1.1", + "Space Ranger 3.1.2", + "Space Ranger 3.1.3", + "demuxlet" + ], + "title": "softwareAndVersion" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + }, + "TotalReads": { + "description": "Total number of reads sequenced from the library.", + "title": "totalReads", + "type": "integer" + } + }, + "required": [ + "AlignmentReference", + "Assay", + "Component", + "LibraryPrepMethod", + "NucleicAcidSource", + "Platform", + "SpecimenModality", + "TotalReads" + ], + "title": "BulkRNA-seqAssayMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.ClinicalMetadataTemplate.schema.json b/model_json_schema/ark.ClinicalMetadataTemplate.schema.json index 657f453e..882a0024 100644 --- a/model_json_schema/ark.ClinicalMetadataTemplate.schema.json +++ b/model_json_schema/ark.ClinicalMetadataTemplate.schema.json @@ -1,242 +1,47 @@ { + "$id": "http://example.com/ClinicalMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "heightUnits": { - "enum": [ - "centimeters", - "feet", - "inches", - "meters" - ] - }, - "comorbidities": { - "type": "array", - "items": { - "enum": [ - "autoimmune thyroid disease", - "multiple sclerosis", - "psoriasis", - "systemic lupus erythematosus", - "inflammatory bowel disease", - "psoriatic arthritis", - "diabetes", - "rheumatoid arthritis", - "Hashimoto's Thyroiditis", - "pulmonary disease", - "other", - "cardiovascular disease", - "" - ] - }, - "maxItems": 12 - }, - "age": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "ethnicity": { - "enum": [ - "Hispanic or Latino", - "unknown", - "Not Hispanic or Latino", - "" - ] - }, - "ageUnits": { - "enum": [ - "years", - "months" - ] - }, - "weight": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "program": { - "enum": [ - "AMP AIM", - "AMP RA/SLE", - "Community Contribution" - ] - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "species": { - "enum": [ - "Homo sapiens" - ] - }, - "individualID": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "sex": { - "enum": [ - "intersex", - "unknown", - "female", - "male" - ] - }, - "race": { - "enum": [ - "White", - "Hispanic", - "Mixed Race", - "American Indian or Alaska Native", - "Asian", - "unknown", - "other", - "Native Hawaiian or Other Pacific Islander", - "Black or African American", - "" - ] - }, - "weightUnits": { - "enum": [ - "g", - "kg", - "oz", - "lb" - ] - }, - "project": { - "type": "array", - "items": { - "enum": [ - "AIM for RA", - "STAMP", - "LOCKIT", - "SLE", - "UMass V-CoRT", - "ELLIPSS", - "RA" - ] - }, - "maxItems": 7 - }, - "height": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "diagnosis": { - "type": "array", - "items": { - "enum": [ - "dermatomyositis", - "Sjogren's disease", - "cutaneous lupus erythematosus", - "psoriasis", - "vitiligo", - "scleroderma", - "control", - "At-Risk RA", - "OA", - "psoriatic arthritis", - "unknown", - "lupus nephritis", - "SLE", - "discoid lupus erythematosus", - "Not Applicable", - "RA" - ] - }, - "maxItems": 16 - }, - "PASI": {}, - "diabetesType": { - "enum": [ - "gestational", - "unknown", - "type 1", - "type 2", - "" - ] - }, - "visitID": {}, - "CDASI": {}, - "VASI": {}, - "VETI": {}, - "vitiligoPattern": { - "enum": [ - "mixed", - "unclassified", - "non-segmental", - "segmental", - "" - ] - }, - "VIDA": {} - }, - "required": [ - "heightUnits", - "age", - "ageUnits", - "weight", - "program", - "Component", - "species", - "individualID", - "sex", - "weightUnits", - "project", - "height", - "diagnosis" - ], "allOf": [ { "if": { "properties": { - "diagnosis": { + "Comorbidities": { "enum": [ - "psoriasis" + "Diabetes" ] } - }, - "required": [ - "diagnosis" - ] + } }, "then": { "properties": { - "PASI": {} + "DiabetesType": { + "not": { + "type": "null" + } + } }, "required": [ - "PASI" + "DiabetesType" ] } }, { "if": { "properties": { - "diagnosis": { + "Comorbidities": { "enum": [ - "psoriasis" + "Psoriasis" ] } - }, - "required": [ - "diagnosis" - ] + } }, "then": { "properties": { - "PASI": {} + "PASI": { + "not": { + "type": "null" + } + } }, "required": [ "PASI" @@ -246,19 +51,20 @@ { "if": { "properties": { - "diagnosis": { + "Diagnosis": { "enum": [ - "psoriasis" + "Psoriasis" ] } - }, - "required": [ - "diagnosis" - ] + } }, "then": { "properties": { - "PASI": {} + "PASI": { + "not": { + "type": "null" + } + } }, "required": [ "PASI" @@ -268,19 +74,20 @@ { "if": { "properties": { - "diagnosis": { + "Comorbidities": { "enum": [ - "psoriasis" + "Psoriasis" ] } - }, - "required": [ - "diagnosis" - ] + } }, "then": { "properties": { - "PASI": {} + "PASI": { + "not": { + "type": "null" + } + } }, "required": [ "PASI" @@ -290,204 +97,412 @@ { "if": { "properties": { - "comorbidities": { + "Diagnosis": { "enum": [ - "diabetes" + "Psoriasis" ] } - }, - "required": [ - "comorbidities" - ] + } }, "then": { "properties": { - "diabetesType": { - "enum": [ - "gestational", - "unknown", - "type 1", - "type 2", - "" - ] + "PASI": { + "not": { + "type": "null" + } } }, "required": [ - "diabetesType" + "PASI" ] } }, { "if": { "properties": { - "program": { + "Diagnosis": { "enum": [ - "AMP AIM" + "Dermatomyositis" ] } - }, - "required": [ - "program" - ] + } }, "then": { "properties": { - "visitID": { + "CDASI": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "visitID" + "CDASI" ] } }, { "if": { "properties": { - "program": { + "Diagnosis": { "enum": [ - "AMP RA/SLE" + "Vitiligo" ] } - }, - "required": [ - "program" - ] + } }, "then": { "properties": { - "visitID": { + "VASI": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "visitID" + "VASI" ] } }, { "if": { "properties": { - "diagnosis": { + "Diagnosis": { "enum": [ - "dermatomyositis" + "Vitiligo" ] } - }, - "required": [ - "diagnosis" - ] + } }, "then": { "properties": { - "CDASI": {} + "VETI": { + "not": { + "type": "null" + } + } }, "required": [ - "CDASI" + "VETI" ] } }, { "if": { "properties": { - "diagnosis": { + "Diagnosis": { "enum": [ - "vitiligo" + "Vitiligo" ] } - }, - "required": [ - "diagnosis" - ] + } }, "then": { "properties": { - "VASI": {} + "VIDA": { + "not": { + "type": "null" + } + } }, "required": [ - "VASI" + "VIDA" ] } }, { "if": { "properties": { - "diagnosis": { + "Diagnosis": { "enum": [ - "vitiligo" + "Vitiligo" ] } - }, - "required": [ - "diagnosis" - ] + } }, "then": { "properties": { - "VETI": {} + "VitiligoPattern": { + "not": { + "type": "null" + } + } }, "required": [ - "VETI" + "VitiligoPattern" ] } }, { "if": { "properties": { - "diagnosis": { + "Program": { "enum": [ - "vitiligo" + "AMPAIM" ] } - }, - "required": [ - "diagnosis" - ] + } }, "then": { "properties": { - "vitiligoPattern": { - "enum": [ - "mixed", - "unclassified", - "non-segmental", - "segmental", - "" - ] + "VisitID": { + "not": { + "type": "null" + } } }, "required": [ - "vitiligoPattern" + "VisitID" ] } }, { "if": { "properties": { - "diagnosis": { + "Program": { "enum": [ - "vitiligo" + "AMPRA/SLE" ] } - }, - "required": [ - "diagnosis" - ] + } }, "then": { "properties": { - "VIDA": {} + "VisitID": { + "not": { + "type": "null" + } + } }, "required": [ - "VIDA" + "VisitID" ] } } - ] + ], + "description": "A template outlining clinical metadata to collect for study subjects.", + "properties": { + "Age": { + "description": "Age at which subject was enrolled in study or age at corresponding visit and data collection event. If value unknown, enter '-1'.", + "title": "age", + "type": "number" + }, + "AgeUnits": { + "description": "The unit of measure used for `ageEnrollment` and `ageDiagnosis`", + "enum": [ + "months", + "years" + ], + "title": "ageUnits" + }, + "CDASI": { + "description": "Cutaneous Dermatomyositis Disease Area and Severity Index, is a measurement used to characterize cutaneous dermatomyositis severity.", + "title": "CDASI", + "type": "string" + }, + "Comorbidities": { + "description": "Any diseases or medical condition that is simultaneously present in addition to `diagnosis`.", + "enum": [ + "Hashimoto's Thyroiditis", + "autoimmune thyroid disease", + "cardiovascular disease", + "diabetes", + "inflammatory bowel disease", + "multiple sclerosis", + "other", + "psoriasis", + "psoriatic arthritis", + "pulmonary disease", + "rheumatoid arthritis", + "systemic lupus erythematosus" + ], + "title": "comorbidities" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DiabetesType": { + "description": "Type of diabetes mellitus.", + "enum": [ + "gestational", + "type 1", + "type 2", + "unknown" + ], + "title": "diabetesType" + }, + "Diagnosis": { + "description": "A high-level classifier indicating the disease status of an individual.", + "enum": [ + "At-Risk RA", + "Not Applicable", + "OA", + "RA", + "SLE", + "Sjogren's disease", + "control", + "cutaneous lupus erythematosus", + "dermatomyositis", + "discoid lupus erythematosus", + "lupus nephritis", + "psoriasis", + "psoriatic arthritis", + "scleroderma", + "unknown", + "vitiligo" + ], + "title": "diagnosis" + }, + "Ethnicity": { + "description": "The ethnicity of a person.", + "enum": [ + "Hispanic or Latino", + "Not Hispanic or Latino", + "unknown" + ], + "title": "ethnicity" + }, + "Height": { + "description": "Standing height of subject.", + "title": "height", + "type": "number" + }, + "HeightUnits": { + "description": "Unit of measure of value provided for `height`.", + "enum": [ + "centimeters", + "feet", + "inches", + "meters" + ], + "title": "heightUnits" + }, + "IndividualID": { + "description": "Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "individualID", + "type": "string" + }, + "PASI": { + "description": "Psoriasis Area and Severity Index, is a measurement of the discoloration, thickness, scaling, and coverage of psoriasis plaques.", + "title": "PASI", + "type": "string" + }, + "Program": { + "description": "Name of the funding program that supported the generation of data and associated files", + "enum": [ + "AMP AIM", + "AMP RA/SLE", + "Community Contribution" + ], + "title": "program" + }, + "Project": { + "description": "A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.", + "enum": [ + "AIM for RA", + "ELLIPSS", + "LOCKIT", + "RA", + "SLE", + "STAMP", + "UMass V-CoRT" + ], + "title": "project" + }, + "Race": { + "description": "A textual description of a person's race.", + "enum": [ + "American Indian or Alaska Native", + "Asian", + "Black or African American", + "Hispanic", + "Mixed Race", + "Native Hawaiian or Other Pacific Islander", + "White", + "other", + "unknown" + ], + "title": "race" + }, + "Sex": { + "description": "A textual description of a person's sex at birth.", + "enum": [ + "female", + "intersex", + "male", + "unknown" + ], + "title": "sex" + }, + "Species": { + "description": "The genus species of sample or subject origin.", + "enum": [ + "Homo sapiens" + ], + "title": "species" + }, + "VASI": { + "description": "Total body Vitiligo Area Severity Index, a measure of the percentage of vitiligo involvement across the body calculated in terms of hand units.", + "title": "VASI", + "type": "string" + }, + "VETI": { + "description": "Vitiligo Extent Tensity Index measures the extent of vitiligo by a numerical score and combines analysis of extensity and severity of vitiligo to produce a constant and reproducible number.", + "title": "VETI", + "type": "number" + }, + "VIDA": { + "description": "Vitiligo Disease Activity Score, a six-point scale that evaluates the activity of vitiligo", + "title": "VIDA", + "type": "string" + }, + "VisitID": { + "description": "Ordinal ID distinguishing different patient visits.", + "title": "visitID", + "type": "string" + }, + "VitiligoPattern": { + "description": "A high-level classification of vitiligo based on the distribution and patterning of lesions across the body.", + "enum": [ + "mixed", + "non-segmental", + "segmental", + "unclassified" + ], + "title": "vitiligoPattern" + }, + "Weight": { + "description": "Weight of subject. If value unknown, enter '-1'.", + "title": "weight", + "type": "number" + }, + "WeightUnits": { + "description": "Abbreviated unit of measure of value provided for `weight`.", + "enum": [ + "g", + "kg", + "lb", + "oz" + ], + "title": "weightUnits" + } + }, + "required": [ + "Age", + "AgeUnits", + "Component", + "Diagnosis", + "Height", + "HeightUnits", + "IndividualID", + "Program", + "Project", + "Sex", + "Species", + "Weight", + "WeightUnits" + ], + "title": "ClinicalMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.CyTOFAssayMetadataTemplate.schema.json b/model_json_schema/ark.CyTOFAssayMetadataTemplate.schema.json index 208a9c2f..7522b0b8 100644 --- a/model_json_schema/ark.CyTOFAssayMetadataTemplate.schema.json +++ b/model_json_schema/ark.CyTOFAssayMetadataTemplate.schema.json @@ -1,171 +1,178 @@ { + "$id": "http://example.com/CyTOFAssayMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", + "description": "A template outlining assay-related metadata for a cytometry time-of-flight (CyTOF) dataset. Each row corresponds to a biospecimen profiled in the experiment.", "properties": { - "softwareAndVersion": { + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", "enum": [ - "Cell Ranger v6.0.2", - "Cell Ranger v3.0.0", + "ASAPSeq", + "CE-MS", + "CITESeq", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", + "LC-MS/MS", + "NULISA", + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", + "WES", + "WGS", + "Xenium", + "feature barcode sequencing", + "flow cytometry", + "imaging mass cytometry", + "imaging mass spectrometry", + "kiloplex", + "multiplexed ELISA", + "scRNASeq", + "scVDJSeq", + "serial IHC", + "snATACSeq", + "snRNASeq" + ], + "title": "assay" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "biospecimenID", + "type": "string" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataCollectionBatch": { + "description": "A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.", + "title": "dataCollectionBatch", + "type": "string" + }, + "Platform": { + "description": "The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.", + "enum": [ + "BD FACSAria Fusion cell sorter", + "BD FACSAria III", + "BD FACSCanto", + "BD FACSCanto II", + "BD FACSDiscover A8", + "BD FACSDiscover S8", + "BD FACSLyric Clinical", + "BD FACSMelody", + "BD FACSymphony S6", + "BD LSRFortessa", + "Chromium Controller", + "Chromium GEM-X Single Cell 3' Chip v4", + "Chromium Next GEM Chip G", + "Chromium Next GEM Chip H", + "Chromium Next GEM Chip K", + "Chromium Next GEM Chip M", + "Chromium Next GEM Chip Q", + "Chromium X", + "Chromium Xo", + "Chromium iX", + "CyTOF XT", + "Cytek Aurora", + "Cytek Aurora Evo", + "Fluidigm BioMark", + "GEM-X Flex Gene Expression Chip", + "GEM-X OCM 5' Chip", + "Helios Mass Cytometer", + "Hyperion", + "Illumina HiSeq 2500", + "Illumina HiSeq X Ten", + "Illumina NextSeq 500", + "Illumina NovaSeq 6000", + "Illumina NovaSeq X", + "Not Applicable", + "Olink Signature Q100", + "Sony MA900", + "Thermo Fisher Attune CytPix", + "Thermo Fisher Attune NxT", + "Thermo Fisher Attune Xenith", + "Visium CytAssist", + "Xenium", + "none", + "unknown" + ], + "title": "platform" + }, + "SampleProcessingBatch": { + "description": "A label indicating batching of sample processing or preparation that occurs prior to data collection.", + "title": "sampleProcessingBatch", + "type": "string" + }, + "SoftwareAndVersion": { + "description": "Relevant software and version used to generate the data file.", + "enum": [ + "BD FACSDiva 8.0.1", "Cell Ranger 9.0.1", - "Space Ranger 3.1.0", "Cell Ranger ATAC v1.1.0", - "Cell Ranger v4.0.0", + "Cell Ranger v3.0.0", + "Cell Ranger v3.0.1", "Cell Ranger v3.0.2", - "Cell Ranger v9.0.0", - "Space Ranger 3.1.1", + "Cell Ranger v3.1.0", + "Cell Ranger v4.0.0", + "Cell Ranger v5.0.0", + "Cell Ranger v5.0.1", "Cell Ranger v6.0.0", + "Cell Ranger v6.0.1", + "Cell Ranger v6.0.2", + "Cell Ranger v6.1.0", + "Cell Ranger v6.1.1", + "Cell Ranger v6.1.2", "Cell Ranger v7.0.0", - "Cell Ranger v5.0.1", + "Cell Ranger v7.0.1", + "Cell Ranger v7.1.0", "Cell Ranger v7.2.0", - "Cell Ranger v3.0.1", - "Cell Ranger v5.0.0", - "BD FACSDiva 8.0.1", - "Space Ranger 3.1.2", - "demuxlet", - "Cell Ranger v3.1.0", - "Cell Ranger v6.1.0", - "Space Ranger 3.0.1", + "Cell Ranger v8.0.0", "Cell Ranger v8.0.1", - "Cell Ranger v6.0.1", - "Cell Ranger v7.1.0", + "Cell Ranger v9.0.0", "Space Ranger 3.0.0", - "Cell Ranger v7.0.1", - "Cell Ranger v6.1.1", - "Cell Ranger v6.1.2", + "Space Ranger 3.0.1", + "Space Ranger 3.1.0", + "Space Ranger 3.1.1", + "Space Ranger 3.1.2", "Space Ranger 3.1.3", - "Cell Ranger v8.0.0", - "" - ] - }, - "dataCollectionBatch": {}, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "targetPanelSynID": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "biospecimenID": { - "not": { - "type": "null" - }, - "minLength": 1 + "demuxlet" + ], + "title": "softwareAndVersion" }, - "sampleProcessingBatch": {}, - "platform": { - "type": "array", - "items": { - "enum": [ - "BD FACSAria Fusion cell sorter", - "unknown", - "GEM-X Flex Gene Expression Chip", - "BD FACSCanto", - "BD FACSMelody", - "BD FACSLyric Clinical", - "Illumina HiSeq X Ten", - "Illumina NextSeq 500", - "Cytek Aurora Evo", - "Chromium Next GEM Chip Q", - "Helios Mass Cytometer", - "BD FACSymphony S6", - "Chromium Controller", - "Chromium Next GEM Chip K", - "Fluidigm BioMark", - "GEM-X OCM 5' Chip", - "CyTOF XT", - "Cytek Aurora", - "Xenium", - "Visium CytAssist", - "Thermo Fisher Attune Xenith", - "BD FACSAria III", - "Chromium X", - "Chromium Next GEM Chip H", - "Illumina NovaSeq X", - "Sony MA900", - "Chromium Next GEM Chip G", - "Olink Signature Q100", - "BD LSRFortessa", - "BD FACSDiscover S8", - "Chromium Xo", - "Thermo Fisher Attune NxT", - "BD FACSCanto II", - "none", - "Not Applicable", - "Chromium GEM-X Single Cell 3' Chip v4", - "Illumina HiSeq 2500", - "Chromium Next GEM Chip M", - "BD FACSDiscover A8", - "Hyperion", - "Thermo Fisher Attune CytPix", - "Illumina NovaSeq 6000", - "Chromium iX" - ] - }, - "maxItems": 43 + "TargetPanel": { + "description": "A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.", + "title": "targetPanel", + "type": "string" }, - "targetPanelSize": {}, - "assay": { - "type": "array", - "items": { - "enum": [ - "H&E", - "VDJSeq", - "imaging mass spectrometry", - "RNASeq", - "Olink Explore HT", - "Olink Target 96", - "Olink Flex", - "kiloplex", - "imaging mass cytometry", - "CosMX", - "snRNASeq", - "CE-MS", - "WES", - "SomaScan", - "Xenium", - "ASAPSeq", - "Olink Target 48", - "scVDJSeq", - "multiplexed ELISA", - "scRNASeq", - "snATACSeq", - "WGS", - "flow cytometry", - "CITESeq", - "NULISA", - "LC-MS/MS", - "Olink Focus", - "GenePS SeqFISH", - "Visium", - "serial IHC", - "Olink Reveal", - "feature barcode sequencing", - "SNP array", - "CyTOF" - ] - }, - "maxItems": 34 + "TargetPanelSize": { + "description": "The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).", + "title": "targetPanelSize", + "type": "integer" }, - "targetPanel": { - "not": { - "type": "null" - }, - "minLength": 1 + "TargetPanelSynID": { + "description": "In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.", + "pattern": "^syn[0-9]{8}", + "title": "targetPanelSynID", + "type": "string" } }, "required": [ + "Assay", + "BiospecimenID", "Component", - "targetPanelSynID", - "biospecimenID", - "platform", - "assay", - "targetPanel" - ] + "Platform", + "TargetPanel", + "TargetPanelSynID" + ], + "title": "CyTOFAssayMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.DatasetAnnotationTemplate.schema.json b/model_json_schema/ark.DatasetAnnotationTemplate.schema.json index 12fa0e98..418a0002 100644 --- a/model_json_schema/ark.DatasetAnnotationTemplate.schema.json +++ b/model_json_schema/ark.DatasetAnnotationTemplate.schema.json @@ -1,299 +1,312 @@ { + "$id": "http://example.com/DatasetAnnotationTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", + "allOf": [ + { + "if": { + "properties": { + "Program": { + "enum": [ + "AMPRA/SLE" + ] + } + } + }, + "then": { + "properties": { + "ProgramPhase": { + "not": { + "type": "null" + } + } + }, + "required": [ + "ProgramPhase" + ] + } + } + ], + "description": "A template outlining dataset metadata to use as annotations for a synapse dataset entity.", "properties": { - "acknowledgmentStatement": { + "ARKRelease": { + "description": "A Dataset-specific attribute specifying the ARK Portal release in which this dataset was first made available to the public.", "enum": [ - "syn26710600/wiki/619685" - ] + "1.0", + "2.0", + "2024.06.R1", + "2024.07.R1", + "2024.08.R1", + "2024.09.R1", + "2024.10.R1", + "2024.12.R1", + "2025.01.R1", + "2025.02.R1", + "2025.03.R1", + "2025.04.R1", + "2025.05.R1", + "2025.06.R1", + "2025.07.R1", + "2025.08.R1", + "2025.09.R1", + "2025.10.R1", + "2025.11.R1", + "2025.12.R1" + ], + "title": "ARKRelease" }, - "diagnosis": { - "type": "array", - "items": { - "enum": [ - "dermatomyositis", - "lupus nephritis", - "psoriatic arthritis", - "control", - "Sjogren's disease", - "unknown", - "cutaneous lupus erythematosus", - "vitiligo", - "SLE", - "scleroderma", - "At-Risk RA", - "psoriasis", - "OA", - "discoid lupus erythematosus", - "RA", - "Not Applicable" - ] - }, - "maxItems": 16 + "AcknowledgmentStatement": { + "description": "A Dataset-specific attribute specifying the path to the wiki subpage within the ARK Portal - backend project that contains the acknowledgement statement that must be included in publications using data from the given dataset as a stipulation of the conditions of use.", + "enum": [ + "syn26710600/wiki/619685" + ], + "title": "acknowledgmentStatement" }, - "species": { + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", "enum": [ - "Homo sapiens" - ] + "ASAPSeq", + "CE-MS", + "CITESeq", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", + "LC-MS/MS", + "NULISA", + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", + "WES", + "WGS", + "Xenium", + "feature barcode sequencing", + "flow cytometry", + "imaging mass cytometry", + "imaging mass spectrometry", + "kiloplex", + "multiplexed ELISA", + "scRNASeq", + "scVDJSeq", + "serial IHC", + "snATACSeq", + "snRNASeq" + ], + "title": "assay" }, - "associatedCodeURL": {}, - "associatedDataset": {}, - "dataSubtype": { - "type": "array", - "items": { - "enum": [ - "single-cell", - "pseudobulk", - "single-nucleus", - "none", - "bulk", - "spatial" - ] - }, - "maxItems": 6 + "AssociatedAccession": { + "description": "This is a File and Dataset annotation attribute indicating additional accessions (i.e., unique identifiers) associated with the data when the data has also been submitted to or can be found in other repositories such as GEO, SRA, dbGaP, etc.", + "title": "associatedAccession", + "type": "string" }, - "project": { - "type": "array", - "items": { - "enum": [ - "AIM for RA", - "ELLIPSS", - "LOCKIT", - "STAMP", - "SLE", - "UMass V-CoRT", - "RA" - ] - }, - "maxItems": 7 + "AssociatedCodeURL": { + "description": "A URL to the repository where associated code is available.", + "title": "associatedCodeURL", + "type": "string" }, - "dataType": { - "type": "array", - "items": { - "enum": [ - "immunostaining", - "microbiome", - "immune repertoire profiling", - "metabolomics", - "multimodal", - "proteomics", - "transcriptomics", - "histology", - "epigenomics", - "cytometry", - "lipidomics", - "genomics" - ] - }, - "maxItems": 12 + "AssociatedDataset": { + "description": "The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.", + "pattern": "^syn[0-9]{8}", + "title": "associatedDataset", + "type": "string" }, - "assay": { - "type": "array", - "items": { - "enum": [ - "Olink Target 48", - "CITESeq", - "flow cytometry", - "LC-MS/MS", - "multiplexed ELISA", - "VDJSeq", - "serial IHC", - "Olink Explore HT", - "Visium", - "GenePS SeqFISH", - "ASAPSeq", - "SomaScan", - "Xenium", - "Olink Flex", - "Olink Target 96", - "Olink Focus", - "CyTOF", - "NULISA", - "feature barcode sequencing", - "imaging mass spectrometry", - "snRNASeq", - "CosMX", - "kiloplex", - "RNASeq", - "scRNASeq", - "WES", - "snATACSeq", - "Olink Reveal", - "CE-MS", - "imaging mass cytometry", - "WGS", - "SNP array", - "H&E", - "scVDJSeq" - ] - }, - "maxItems": 34 + "BiospecimenSubtype": { + "description": "Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.", + "enum": [ + "FFPE tissue", + "PFA-fixed tissue", + "cell or tissue lysate", + "cell suspension", + "flow-sorted cells", + "fresh tissue", + "frozen tissue", + "nuclei suspension", + "supernatant" + ], + "title": "biospecimenSubtype" }, - "ImmPortAccession": {}, - "biospecimenType": { - "type": "array", - "items": { - "enum": [ - "serum", - "suction blister fluid", - "PBMCs", - "uvea", - "skin biopsy", - "plasma", - "fibroblast-like synoviocyte", - "urine", - "whole blood", - "kidney biopsy", - "synovial tissue", - "total leukocytes", - "stool", - "skin swab", - "saliva", - "none", - "salivary gland", - "primary cell culture", - "suction blister cells", - "synovial fluid", - "cell line" - ] - }, - "maxItems": 21 + "BiospecimenType": { + "description": "A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.", + "enum": [ + "PBMCs", + "cell line", + "fibroblast-like synoviocyte", + "kidney biopsy", + "none", + "plasma", + "primary cell culture", + "saliva", + "salivary gland", + "serum", + "skin biopsy", + "skin swab", + "stool", + "suction blister cells", + "suction blister fluid", + "synovial fluid", + "synovial tissue", + "total leukocytes", + "urine", + "uvea", + "whole blood" + ], + "title": "biospecimenType" }, - "publicationSynID": {}, - "ARKRelease": { + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataSubtype": { + "description": "General classification to differentiate between omics profiling modalities. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list, however this is largely only expected in the context of Datasets and files that contain integrated experimental data spanning multiple types.", "enum": [ - "2025.02.R1", - "2025.05.R1", - "2024.07.R1", - "2024.12.R1", - "2025.07.R1", - "2024.10.R1", - "1.0", - "2025.10.R1", - "2025.04.R1", - "2025.09.R1", - "2.0", - "2024.09.R1", - "2025.03.R1", - "2025.01.R1", - "2024.08.R1", - "2025.12.R1", - "2025.08.R1", - "2024.06.R1", - "2025.06.R1", - "2025.11.R1" - ] + "bulk", + "none", + "pseudobulk", + "single-cell", + "single-nucleus", + "spatial" + ], + "title": "dataSubtype" }, - "datasetStatus": { + "DataType": { + "description": "High-level classification of the type of data contained in the file, loosely related to the experimental method or biological entity that is being profiled. Select all that apply using a comma-delimited list, though in most cases only a single label is expected. For multimodal datasets with concomitant profiling of biospecimen include 'multimodal'.", "enum": [ - "test", - "deprecated", - "unreleased", - "released", - "under peer review" - ] + "cytometry", + "epigenomics", + "genomics", + "histology", + "immune repertoire profiling", + "immunostaining", + "lipidomics", + "metabolomics", + "microbiome", + "multimodal", + "proteomics", + "transcriptomics" + ], + "title": "dataType" }, - "datasetDescription": { - "not": { - "type": "null" - }, - "minLength": 1 + "DatasetDescription": { + "description": "A Dataset-specific attribute specifying the synID of the folder that contains a wiki write-up of the dataset description. This wiki content will be surfaced on the ARK Portal frontend site.", + "pattern": "^syn[0-9]{8}", + "title": "datasetDescription", + "type": "string" }, - "associatedAccession": {}, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 + "DatasetStatus": { + "description": "A categorical label indicating the status of an ARK Portal dataset. This is applied to improve downstream management of datasets as well as various ETL workflows.", + "enum": [ + "deprecated", + "released", + "test", + "under peer review", + "unreleased" + ], + "title": "datasetStatus" }, - "datasetType": { + "DatasetType": { + "description": "High-level classification of dataset entity distinguishing between datasets compiled for a specific publication or as a general data resource.", "enum": [ "experimental", "publication" - ] + ], + "title": "datasetType" + }, + "Diagnosis": { + "description": "A high-level classifier indicating the disease status of an individual.", + "enum": [ + "At-Risk RA", + "Not Applicable", + "OA", + "RA", + "SLE", + "Sjogren's disease", + "control", + "cutaneous lupus erythematosus", + "dermatomyositis", + "discoid lupus erythematosus", + "lupus nephritis", + "psoriasis", + "psoriatic arthritis", + "scleroderma", + "unknown", + "vitiligo" + ], + "title": "diagnosis" + }, + "ImmPortAccession": { + "description": "Accession to corresponding information in ImmPort.", + "pattern": "^SDY", + "title": "ImmPortAccession", + "type": "string" }, - "program": { + "Program": { + "description": "Name of the funding program that supported the generation of data and associated files", "enum": [ - "Community Contribution", "AMP AIM", - "AMP RA/SLE" - ] + "AMP RA/SLE", + "Community Contribution" + ], + "title": "program" }, - "biospecimenSubtype": { + "ProgramPhase": { + "description": "A label noting which AMP RA/SLE program phase generated the data.", "enum": [ - "supernatant", - "frozen tissue", - "PFA-fixed tissue", - "FFPE tissue", - "flow-sorted cells", - "cell or tissue lysate", - "fresh tissue", - "nuclei suspension", - "cell suspension", - "" - ] + "I", + "II" + ], + "title": "programPhase" }, - "programPhase": { - "type": "array", - "items": { - "enum": [ - "I", - "II", - "" - ] - }, - "maxItems": 2 + "Project": { + "description": "A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.", + "enum": [ + "AIM for RA", + "ELLIPSS", + "LOCKIT", + "RA", + "SLE", + "STAMP", + "UMass V-CoRT" + ], + "title": "project" + }, + "PublicationSynID": { + "description": "The synID of the corresponding Synapse entity that stores metadata about the publication. This is used to differentiate publication-specific files, often consisting of level 4 processed data and expanded subject metadata, in a publication dataset that also includes raw or minimally processed files from experimental datasets. This provides an easy way to distinguish and select for the publication-specific data from which the research findings were derived. When this attribute is used to annotate a Dataset it serves as a way to directly link the Dataset entity with the publication metadata stored in Synapse.", + "pattern": "^syn[0-9]{8}", + "title": "publicationSynID", + "type": "string" + }, + "Species": { + "description": "The genus species of sample or subject origin.", + "enum": [ + "Homo sapiens" + ], + "title": "species" } }, "required": [ - "acknowledgmentStatement", - "diagnosis", - "species", - "dataSubtype", - "project", - "dataType", - "assay", - "biospecimenType", "ARKRelease", - "datasetStatus", - "datasetDescription", + "AcknowledgmentStatement", + "Assay", + "BiospecimenType", "Component", - "datasetType", - "program" + "DataSubtype", + "DataType", + "DatasetDescription", + "DatasetStatus", + "DatasetType", + "Diagnosis", + "Program", + "Project", + "Species" ], - "allOf": [ - { - "if": { - "properties": { - "program": { - "enum": [ - "AMP RA/SLE" - ] - } - }, - "required": [ - "program" - ] - }, - "then": { - "properties": { - "programPhase": { - "type": "array", - "items": { - "enum": [ - "I", - "II" - ] - }, - "maxItems": 2 - } - }, - "required": [ - "programPhase" - ] - } - } - ] + "title": "DatasetAnnotationTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.FCSFileAnnotationTemplate.schema.json b/model_json_schema/ark.FCSFileAnnotationTemplate.schema.json index 66351b4d..82cccc9d 100644 --- a/model_json_schema/ark.FCSFileAnnotationTemplate.schema.json +++ b/model_json_schema/ark.FCSFileAnnotationTemplate.schema.json @@ -1,143 +1,162 @@ { + "$id": "http://example.com/FCSFileAnnotationTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "fileFormat": { - "enum": [ - "fcs", - "" - ] - }, - "eventCount": {}, - "specimenModality": { - "enum": [ - "multispecimen", - "single specimen", - "unknown" - ] - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "assay": { - "enum": [ - "CyTOF", - "flow cytometry" - ] - }, - "dataCollectionBatch": {}, - "sampleProcessingBatch": {}, - "individualID": {}, - "biospecimenID": {} - }, - "required": [ - "specimenModality", - "Component", - "assay" - ], "allOf": [ { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "multispecimen" + "Multispecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "dataCollectionBatch": {} + "DataCollectionBatch": { + "not": { + "type": "null" + } + } }, "required": [ - "dataCollectionBatch" + "DataCollectionBatch" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "multispecimen" + "Multispecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "sampleProcessingBatch": {} + "SampleProcessingBatch": { + "not": { + "type": "null" + } + } }, "required": [ - "sampleProcessingBatch" + "SampleProcessingBatch" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "single specimen" + "Singlespecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "individualID": { + "BiospecimenID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "individualID" + "BiospecimenID" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "single specimen" + "Singlespecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "IndividualID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "IndividualID" ] } } - ] + ], + "description": "A template outlining metadata to be collected and applied to FCS file entities as annotations in Synapse.", + "properties": { + "Assay": { + "description": "The technology used to generate the data in this file.", + "enum": [ + "CyTOF", + "flow cytometry" + ], + "title": "assay" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "biospecimenID", + "type": "string" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataCollectionBatch": { + "description": "A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.", + "title": "dataCollectionBatch", + "type": "string" + }, + "EventCount": { + "description": "The total number of events detected and captured in the corresponding FCS file. In cytometry, an event corresponds to particles detected and measured by the instrument. This number corresponds to the number of rows in the FCS file and is expected to be a whole integer number.", + "title": "eventCount", + "type": "integer" + }, + "FileFormat": { + "description": "Standard file format name or file extension", + "enum": [ + "fcs" + ], + "title": "fileFormat" + }, + "IndividualID": { + "description": "Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "individualID", + "type": "string" + }, + "SampleProcessingBatch": { + "description": "A label indicating batching of sample processing or preparation that occurs prior to data collection.", + "title": "sampleProcessingBatch", + "type": "string" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + } + }, + "required": [ + "Assay", + "Component", + "SpecimenModality" + ], + "title": "FCSFileAnnotationTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.FastqFileAnnotationTemplate.schema.json b/model_json_schema/ark.FastqFileAnnotationTemplate.schema.json index 6c3e24cc..260761c8 100644 --- a/model_json_schema/ark.FastqFileAnnotationTemplate.schema.json +++ b/model_json_schema/ark.FastqFileAnnotationTemplate.schema.json @@ -1,217 +1,229 @@ { + "$id": "http://example.com/FastqFileAnnotationTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "fileFormat": { - "enum": [ - "fastq" - ] - }, - "specimenModality": { - "enum": [ - "multispecimen", - "unknown", - "single specimen" - ] - }, - "readLength": {}, - "assay": { - "type": "array", - "items": { - "enum": [ - "RNASeq", - "ASAPSeq", - "feature barcode sequencing", - "snRNASeq", - "scVDJSeq", - "WGS", - "VDJSeq", - "scRNASeq", - "snATACSeq", - "WES", - "CITESeq" - ] - }, - "maxItems": 11 - }, - "libraryID": {}, - "biospecimenID": {}, - "individualID": {}, - "targetPanel": {}, - "targetPanelSynID": {}, - "targetPanelSize": {} - }, - "required": [ - "Component", - "fileFormat", - "specimenModality", - "assay" - ], "allOf": [ { "if": { "properties": { - "specimenModality": { + "Assay": { "enum": [ - "multispecimen" + "Featurebarcodesequencing" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "libraryID": { + "TargetPanel": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "libraryID" + "TargetPanel" ] } }, { "if": { "properties": { - "specimenModality": { + "Assay": { "enum": [ - "single specimen" + "Featurebarcodesequencing" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "TargetPanelSize": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "TargetPanelSize" ] } }, { "if": { "properties": { - "specimenModality": { + "Assay": { "enum": [ - "single specimen" + "Featurebarcodesequencing" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "individualID": { + "TargetPanelSynID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "individualID" + "TargetPanelSynID" ] } }, { "if": { "properties": { - "assay": { + "SpecimenModality": { "enum": [ - "feature barcode sequencing" + "Multispecimen" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanel": { + "LibraryID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "targetPanel" + "LibraryID" ] } }, { "if": { "properties": { - "assay": { + "SpecimenModality": { "enum": [ - "feature barcode sequencing" + "Singlespecimen" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanelSynID": { + "BiospecimenID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "targetPanelSynID" + "BiospecimenID" ] } }, { "if": { "properties": { - "assay": { + "SpecimenModality": { "enum": [ - "feature barcode sequencing" + "Singlespecimen" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanelSize": {} + "IndividualID": { + "not": { + "type": "null" + } + } }, "required": [ - "targetPanelSize" + "IndividualID" ] } } - ] + ], + "description": "A template outlining metadata to be collected and applied to fastq file entities as annotations in Synapse.", + "properties": { + "Assay": { + "description": "The technology used to generate the data in this file. Select all assays that apply. e.g., the GEX fastq files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "enum": [ + "ASAPSeq", + "CITESeq", + "RNASeq", + "VDJSeq", + "WES", + "WGS", + "feature barcode sequencing", + "scRNASeq", + "scVDJSeq", + "snATACSeq", + "snRNASeq" + ], + "title": "assay" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "biospecimenID", + "type": "string" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "FileFormat": { + "description": "Standard file format name or file extension", + "enum": [ + "fastq" + ], + "title": "fileFormat" + }, + "IndividualID": { + "description": "Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "individualID", + "type": "string" + }, + "LibraryID": { + "description": "A library label or name, unique within an experiment, used to distinguish sequencing libraries.", + "title": "libraryID", + "type": "string" + }, + "ReadLength": { + "description": "The number of base pairs (bp) sequenced for reads in a fastq file.", + "title": "readLength", + "type": "integer" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + }, + "TargetPanel": { + "description": "A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.", + "title": "targetPanel", + "type": "string" + }, + "TargetPanelSize": { + "description": "The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).", + "title": "targetPanelSize", + "type": "integer" + }, + "TargetPanelSynID": { + "description": "In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.", + "pattern": "^syn[0-9]{8}", + "title": "targetPanelSynID", + "type": "string" + } + }, + "required": [ + "Assay", + "Component", + "FileFormat", + "SpecimenModality" + ], + "title": "FastqFileAnnotationTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.InVitroBiospecimenMetadataTemplate.schema.json b/model_json_schema/ark.InVitroBiospecimenMetadataTemplate.schema.json index 0fe1770c..2461c524 100644 --- a/model_json_schema/ark.InVitroBiospecimenMetadataTemplate.schema.json +++ b/model_json_schema/ark.InVitroBiospecimenMetadataTemplate.schema.json @@ -1,1194 +1,1090 @@ { + "$id": "http://example.com/InVitroBiospecimenMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "program": { - "enum": [ - "AMP RA/SLE", - "AMP AIM", - "Community Contribution" - ] - }, - "biospecimenID": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "notes": {}, - "altSampleID": {}, - "biospecimenType": { - "enum": [ - "fibroblast-like synoviocyte", - "primary cell culture", - "suction blister cells", - "synovial fluid", - "urine", - "salivary gland", - "none", - "skin swab", - "saliva", - "serum", - "PBMCs", - "stool", - "kidney biopsy", - "total leukocytes", - "whole blood", - "cell line", - "skin biopsy", - "synovial tissue", - "plasma", - "suction blister fluid", - "uvea" - ] - }, - "individualID": {}, - "parentBiospecimenID": {}, - "biospecimenSubtype": { - "enum": [ - "FFPE tissue", - "flow-sorted cells", - "cell suspension", - "frozen tissue", - "fresh tissue", - "nuclei suspension", - "supernatant", - "PFA-fixed tissue", - "cell or tissue lysate", - "" - ] - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "project": { - "type": "array", - "items": { - "enum": [ - "LOCKIT", - "RA", - "ELLIPSS", - "AIM for RA", - "SLE", - "STAMP", - "UMass V-CoRT" - ] - }, - "maxItems": 7 - }, - "treatmentTimepoint": {}, - "sampleCollectionBatch": {}, - "treatment": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "visitID": {}, - "primaryCellSource": { - "enum": [ - "synovial tissue", - "pannus-derived epidermis", - "PBMCs", - "pannus-derived dermis", - "total leukocytes", - "urine", - "whole blood", - "salivary gland", - "uvea", - "kidney", - "" - ] - }, - "cellType": {}, - "cellOntologyID": {}, - "anatomicalSite": { - "enum": [ - "right knee joint", - "left wrist joint", - "left knee joint", - "left ankle joint", - "right 2nd MTP joint", - "right wrist joint", - "left hip joint", - "unknown", - "left 2nd MCP joint", - "right 2nd MCP joint", - "other site", - "right ankle joint", - "right hip joint", - "right 3rd MCP joint", - "right 1st MTP joint", - "" - ] - }, - "skinSiteStatus": { - "enum": [ - "non-lesional", - "lesional", - "healthy control", - "lesional proximal", - "" - ] - }, - "synovialCollectionProcedure": { - "enum": [ - "synovectomy", - "biopsy", - "unknown", - "arthroplasty", - "" - ] - }, - "salivaCollectionProcedure": { - "enum": [ - "stimulated", - "unstimulated", - "" - ] - }, - "krennLining": {}, - "krennInflammatory": {}, - "krennSynovitisScore": {}, - "krennStroma": {}, - "FACSPopulation": {}, - "userDefinedCellType": {} - }, - "required": [ - "program", - "biospecimenID", - "biospecimenType", - "Component", - "project", - "treatment" - ], "allOf": [ { "if": { "properties": { - "program": { + "BiospecimenSubtype": { "enum": [ - "AMP RA/SLE" + "Cellsuspension" ] } - }, - "required": [ - "program" - ] + } }, "then": { "properties": { - "visitID": { + "CellOntologyID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "visitID" + "CellOntologyID" ] } }, { "if": { "properties": { - "program": { + "BiospecimenSubtype": { "enum": [ - "AMP AIM" + "Flow-sortedcells" ] } - }, - "required": [ - "program" - ] + } }, "then": { "properties": { - "visitID": { + "CellOntologyID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "visitID" + "CellOntologyID" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "primary cell culture" + "Cellline" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "primaryCellSource": { - "enum": [ - "synovial tissue", - "pannus-derived epidermis", - "PBMCs", - "pannus-derived dermis", - "total leukocytes", - "urine", - "whole blood", - "salivary gland", - "uvea", - "kidney" - ] + "CellOntologyID": { + "not": { + "type": "null" + } } }, "required": [ - "primaryCellSource" + "CellOntologyID" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "primary cell culture" + "Primarycellculture" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "cellType": {} + "CellOntologyID": { + "not": { + "type": "null" + } + } }, "required": [ - "cellType" + "CellOntologyID" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenSubtype": { "enum": [ - "cell line" + "Cellsuspension" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "cellType": {} + "CellType": { + "not": { + "type": "null" + } + } }, "required": [ - "cellType" + "CellType" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "BiospecimenSubtype": { "enum": [ - "flow-sorted cells" + "Flow-sortedcells" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "cellType": {} + "CellType": { + "not": { + "type": "null" + } + } }, "required": [ - "cellType" + "CellType" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "BiospecimenType": { "enum": [ - "cell suspension" + "Cellline" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "cellType": {} + "CellType": { + "not": { + "type": "null" + } + } }, "required": [ - "cellType" + "CellType" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "primary cell culture" + "Primarycellculture" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "cellOntologyID": {} + "CellType": { + "not": { + "type": "null" + } + } }, "required": [ - "cellOntologyID" + "CellType" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenSubtype": { "enum": [ - "cell line" + "Cellsuspension" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "cellOntologyID": {} + "UserDefinedCellType": { + "not": { + "type": "null" + } + } }, "required": [ - "cellOntologyID" + "UserDefinedCellType" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "BiospecimenSubtype": { "enum": [ - "flow-sorted cells" + "Flow-sortedcells" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "cellOntologyID": {} + "UserDefinedCellType": { + "not": { + "type": "null" + } + } }, "required": [ - "cellOntologyID" + "UserDefinedCellType" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "BiospecimenSubtype": { "enum": [ - "cell suspension" + "Flow-sortedcells" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "cellOntologyID": {} + "FACSPopulation": { + "not": { + "type": "null" + } + } }, "required": [ - "cellOntologyID" + "FACSPopulation" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "suction blister cells" + "Primarycellculture" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right knee joint", - "left wrist joint", - "left knee joint", - "left ankle joint", - "right 2nd MTP joint", - "right wrist joint", - "left hip joint", - "unknown", - "left 2nd MCP joint", - "right 2nd MCP joint", - "other site", - "right ankle joint", - "right hip joint", - "right 3rd MCP joint", - "right 1st MTP joint" - ] + "PrimaryCellSource": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "PrimaryCellSource" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "synovial fluid" + "Saliva" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right knee joint", - "left wrist joint", - "left knee joint", - "left ankle joint", - "right 2nd MTP joint", - "right wrist joint", - "left hip joint", - "unknown", - "left 2nd MCP joint", - "right 2nd MCP joint", - "other site", - "right ankle joint", - "right hip joint", - "right 3rd MCP joint", - "right 1st MTP joint" - ] + "SalivaCollectionProcedure": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "SalivaCollectionProcedure" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "skin swab" + "Skinbiopsy" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right knee joint", - "left wrist joint", - "left knee joint", - "left ankle joint", - "right 2nd MTP joint", - "right wrist joint", - "left hip joint", - "unknown", - "left 2nd MCP joint", - "right 2nd MCP joint", - "other site", - "right ankle joint", - "right hip joint", - "right 3rd MCP joint", - "right 1st MTP joint" - ] + "AnatomicalSite": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "skin biopsy" + "Skinswab" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right knee joint", - "left wrist joint", - "left knee joint", - "left ankle joint", - "right 2nd MTP joint", - "right wrist joint", - "left hip joint", - "unknown", - "left 2nd MCP joint", - "right 2nd MCP joint", - "other site", - "right ankle joint", - "right hip joint", - "right 3rd MCP joint", - "right 1st MTP joint" - ] + "AnatomicalSite": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "AnatomicalSite" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Suctionblistercells" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right knee joint", - "left wrist joint", - "left knee joint", - "left ankle joint", - "right 2nd MTP joint", - "right wrist joint", - "left hip joint", - "unknown", - "left 2nd MCP joint", - "right 2nd MCP joint", - "other site", - "right ankle joint", - "right hip joint", - "right 3rd MCP joint", - "right 1st MTP joint" - ] + "AnatomicalSite": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "AnatomicalSite" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Suctionblisterfluid" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right knee joint", - "left wrist joint", - "left knee joint", - "left ankle joint", - "right 2nd MTP joint", - "right wrist joint", - "left hip joint", - "unknown", - "left 2nd MCP joint", - "right 2nd MCP joint", - "other site", - "right ankle joint", - "right hip joint", - "right 3rd MCP joint", - "right 1st MTP joint" - ] + "AnatomicalSite": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "suction blister fluid" + "Synovialfluid" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right knee joint", - "left wrist joint", - "left knee joint", - "left ankle joint", - "right 2nd MTP joint", - "right wrist joint", - "left hip joint", - "unknown", - "left 2nd MCP joint", - "right 2nd MCP joint", - "other site", - "right ankle joint", - "right hip joint", - "right 3rd MCP joint", - "right 1st MTP joint" - ] + "AnatomicalSite": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "suction blister cells" + "Synovialtissue" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "skinSiteStatus": { - "enum": [ - "non-lesional", - "lesional", - "healthy control", - "lesional proximal" - ] + "AnatomicalSite": { + "not": { + "type": "null" + } } }, "required": [ - "skinSiteStatus" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenType": { + "PrimaryCellSource": { "enum": [ - "skin swab" + "Synovialtissue" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "skinSiteStatus": { - "enum": [ - "non-lesional", - "lesional", - "healthy control", - "lesional proximal" - ] + "AnatomicalSite": { + "not": { + "type": "null" + } } }, "required": [ - "skinSiteStatus" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "skin biopsy" + "Skinbiopsy" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "skinSiteStatus": { - "enum": [ - "non-lesional", - "lesional", - "healthy control", - "lesional proximal" - ] + "SkinSiteStatus": { + "not": { + "type": "null" + } } }, "required": [ - "skinSiteStatus" + "SkinSiteStatus" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "suction blister fluid" + "Skinswab" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "skinSiteStatus": { - "enum": [ - "non-lesional", - "lesional", - "healthy control", - "lesional proximal" - ] + "SkinSiteStatus": { + "not": { + "type": "null" + } } }, "required": [ - "skinSiteStatus" + "SkinSiteStatus" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "synovial fluid" + "Suctionblistercells" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "synovialCollectionProcedure": { - "enum": [ - "synovectomy", - "biopsy", - "unknown", - "arthroplasty" - ] + "SkinSiteStatus": { + "not": { + "type": "null" + } } }, "required": [ - "synovialCollectionProcedure" + "SkinSiteStatus" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Suctionblisterfluid" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "synovialCollectionProcedure": { - "enum": [ - "synovectomy", - "biopsy", - "unknown", - "arthroplasty" - ] + "SkinSiteStatus": { + "not": { + "type": "null" + } } }, "required": [ - "synovialCollectionProcedure" + "SkinSiteStatus" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Synovialfluid" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "synovialCollectionProcedure": { - "enum": [ - "synovectomy", - "biopsy", - "unknown", - "arthroplasty" - ] + "SynovialCollectionProcedure": { + "not": { + "type": "null" + } } }, "required": [ - "synovialCollectionProcedure" + "SynovialCollectionProcedure" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "saliva" + "Synovialtissue" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "salivaCollectionProcedure": { - "enum": [ - "stimulated", - "unstimulated" - ] + "SynovialCollectionProcedure": { + "not": { + "type": "null" + } } }, "required": [ - "salivaCollectionProcedure" + "SynovialCollectionProcedure" ] } }, { "if": { "properties": { - "primaryCellSource": { + "PrimaryCellSource": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennLining": {} + "SynovialCollectionProcedure": { + "not": { + "type": "null" + } + } }, "required": [ - "krennLining" + "SynovialCollectionProcedure" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennLining": {} + "KrennInflammatory": { + "not": { + "type": "null" + } + } }, "required": [ - "krennLining" + "KrennInflammatory" ] } }, { "if": { "properties": { - "primaryCellSource": { + "PrimaryCellSource": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennInflammatory": {} + "KrennInflammatory": { + "not": { + "type": "null" + } + } }, "required": [ - "krennInflammatory" + "KrennInflammatory" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennInflammatory": {} + "KrennLining": { + "not": { + "type": "null" + } + } }, "required": [ - "krennInflammatory" + "KrennLining" ] } }, { "if": { "properties": { - "primaryCellSource": { + "PrimaryCellSource": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennSynovitisScore": {} + "KrennLining": { + "not": { + "type": "null" + } + } }, "required": [ - "krennSynovitisScore" + "KrennLining" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennSynovitisScore": {} + "KrennStroma": { + "not": { + "type": "null" + } + } }, "required": [ - "krennSynovitisScore" + "KrennStroma" ] } }, { "if": { "properties": { - "primaryCellSource": { + "PrimaryCellSource": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennStroma": {} + "KrennStroma": { + "not": { + "type": "null" + } + } }, "required": [ - "krennStroma" + "KrennStroma" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennStroma": {} + "KrennSynovitisScore": { + "not": { + "type": "null" + } + } }, "required": [ - "krennStroma" + "KrennSynovitisScore" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "PrimaryCellSource": { "enum": [ - "flow-sorted cells" + "Synovialtissue" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "FACSPopulation": { + "KrennSynovitisScore": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "FACSPopulation" + "KrennSynovitisScore" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "Program": { "enum": [ - "flow-sorted cells" + "AMPAIM" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "userDefinedCellType": {} + "VisitID": { + "not": { + "type": "null" + } + } }, "required": [ - "userDefinedCellType" + "VisitID" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "Program": { "enum": [ - "cell suspension" + "AMPRA/SLE" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "userDefinedCellType": {} + "VisitID": { + "not": { + "type": "null" + } + } }, "required": [ - "userDefinedCellType" + "VisitID" ] } } - ] + ], + "description": "A template outlining metadata to be collected for biospecimen used for an in vitro experiment.", + "properties": { + "AltSampleID": { + "description": "An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.", + "title": "altSampleID", + "type": "string" + }, + "AnatomicalSite": { + "description": "The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.", + "enum": [ + "left 2nd MCP joint", + "left ankle joint", + "left hip joint", + "left knee joint", + "left wrist joint", + "other site", + "right 1st MTP joint", + "right 2nd MCP joint", + "right 2nd MTP joint", + "right 3rd MCP joint", + "right ankle joint", + "right hip joint", + "right knee joint", + "right wrist joint", + "unknown" + ], + "title": "anatomicalSite" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "biospecimenID", + "type": "string" + }, + "BiospecimenSubtype": { + "description": "Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.", + "enum": [ + "FFPE tissue", + "PFA-fixed tissue", + "cell or tissue lysate", + "cell suspension", + "flow-sorted cells", + "fresh tissue", + "frozen tissue", + "nuclei suspension", + "supernatant" + ], + "title": "biospecimenSubtype" + }, + "BiospecimenType": { + "description": "A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.", + "enum": [ + "PBMCs", + "cell line", + "fibroblast-like synoviocyte", + "kidney biopsy", + "none", + "plasma", + "primary cell culture", + "saliva", + "salivary gland", + "serum", + "skin biopsy", + "skin swab", + "stool", + "suction blister cells", + "suction blister fluid", + "synovial fluid", + "synovial tissue", + "total leukocytes", + "urine", + "uvea", + "whole blood" + ], + "title": "biospecimenType" + }, + "CellOntologyID": { + "description": "Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.", + "pattern": "^CL:", + "title": "cellOntologyID", + "type": "string" + }, + "CellType": { + "description": "The cell type name from Cell Ontology for the corresponding CL identifier.", + "title": "cellType", + "type": "string" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "FACSPopulation": { + "description": "A description of the marker gating strategy used to derive the population cells with FACS.", + "title": "FACSPopulation", + "type": "string" + }, + "IndividualID": { + "description": "Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "individualID", + "type": "string" + }, + "KrennInflammatory": { + "description": "A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.", + "title": "krennInflammatory", + "type": "number" + }, + "KrennLining": { + "description": "A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.", + "title": "krennLining", + "type": "number" + }, + "KrennStroma": { + "description": "A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.", + "title": "krennStroma", + "type": "number" + }, + "KrennSynovitisScore": { + "description": "The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.", + "title": "krennSynovitisScore", + "type": "number" + }, + "Notes": { + "description": "Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.", + "title": "notes", + "type": "string" + }, + "ParentBiospecimenID": { + "description": "The biospecimenID associated with the originating biospecimen for derived or child biospecimens.", + "title": "parentBiospecimenID", + "type": "string" + }, + "PrimaryCellSource": { + "description": "A label indicating the biological source material from which a primary cell culture was derived.", + "enum": [ + "PBMCs", + "kidney", + "pannus-derived dermis", + "pannus-derived epidermis", + "salivary gland", + "synovial tissue", + "total leukocytes", + "urine", + "uvea", + "whole blood" + ], + "title": "primaryCellSource" + }, + "Program": { + "description": "Name of the funding program that supported the generation of data and associated files", + "enum": [ + "AMP AIM", + "AMP RA/SLE", + "Community Contribution" + ], + "title": "program" + }, + "Project": { + "description": "A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.", + "enum": [ + "AIM for RA", + "ELLIPSS", + "LOCKIT", + "RA", + "SLE", + "STAMP", + "UMass V-CoRT" + ], + "title": "project" + }, + "SalivaCollectionProcedure": { + "description": "Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.", + "enum": [ + "stimulated", + "unstimulated" + ], + "title": "salivaCollectionProcedure" + }, + "SampleCollectionBatch": { + "description": "A label indicating batching of sample collection or experiment execution that occurs prior to data collection.", + "title": "sampleCollectionBatch", + "type": "string" + }, + "SkinSiteStatus": { + "description": "Disease manifestation status of skin biospecimen.", + "enum": [ + "healthy control", + "lesional", + "lesional proximal", + "non-lesional" + ], + "title": "skinSiteStatus" + }, + "SynovialCollectionProcedure": { + "description": "Classification of procedure for synovial tissue collection.", + "enum": [ + "arthroplasty", + "biopsy", + "synovectomy", + "unknown" + ], + "title": "synovialCollectionProcedure" + }, + "Treatment": { + "description": "A short descriptive label indicating what experimental treatments have been applied to a biospecimen before data capture. Please include the word 'control' to indicate specific treatments that are intended to serve as a control group.", + "title": "treatment", + "type": "string" + }, + "TreatmentTimepoint": { + "description": "Where applicable, specify the timepoint relative to treatment to distinguish different sets of samples that have undergone the same treatment but for different lengths of time. REQUIRED: please specify the unit of time, e.g., secs, mins, hrs, days, etc.", + "title": "treatmentTimepoint", + "type": "string" + }, + "UserDefinedCellType": { + "description": "User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.", + "title": "userDefinedCellType", + "type": "string" + }, + "VisitID": { + "description": "Ordinal ID distinguishing different patient visits.", + "title": "visitID", + "type": "string" + } + }, + "required": [ + "BiospecimenID", + "BiospecimenType", + "Component", + "Program", + "Project", + "Treatment" + ], + "title": "InVitroBiospecimenMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.OlinkAssayMetadataTemplate.schema.json b/model_json_schema/ark.OlinkAssayMetadataTemplate.schema.json index 735b1cd6..ee3560f8 100644 --- a/model_json_schema/ark.OlinkAssayMetadataTemplate.schema.json +++ b/model_json_schema/ark.OlinkAssayMetadataTemplate.schema.json @@ -1,64 +1,65 @@ { + "$id": "http://example.com/OlinkAssayMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", + "description": "A template outlining assay metadata to be collected for each plate in an Olink dataset.", "properties": { - "targetPanelSynID": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "targetPanel": { - "not": { - "type": "null" - }, - "minLength": 1 + "Assay": { + "description": "The technology used to generate the data in this file.", + "enum": [ + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96" + ], + "title": "assay" }, "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "targetPanelSize": {}, - "platform": { - "type": "array", - "items": { - "enum": [ - "Illumina NovaSeq 6000", - "Fluidigm BioMark", - "Olink Signature Q100", - "unknown", - "Illumina NextSeq 500", - "" - ] - }, - "maxItems": 5 + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" }, - "plateID": { - "not": { - "type": "null" - }, - "minLength": 1 + "PlateID": { + "description": "An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.", + "title": "plateID", + "type": "string" }, - "assay": { + "Platform": { + "description": "The specific instrument (manufacturer, model, etc.) that was used to carry out a laboratory or computational experiment.", "enum": [ - "Olink Flex", - "Olink Target 96", - "Olink Focus", - "Olink Reveal", - "Olink Explore HT", - "Olink Target 48" - ] + "Fluidigm BioMark", + "Illumina NextSeq 500", + "Illumina NovaSeq 6000", + "Olink Signature Q100", + "unknown" + ], + "title": "platform" + }, + "TargetPanel": { + "description": "A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.", + "title": "targetPanel", + "type": "string" + }, + "TargetPanelSize": { + "description": "The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).", + "title": "targetPanelSize", + "type": "integer" + }, + "TargetPanelSynID": { + "description": "In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.", + "pattern": "^syn[0-9]{8}", + "title": "targetPanelSynID", + "type": "string" } }, "required": [ - "targetPanelSynID", - "targetPanel", + "Assay", "Component", - "plateID", - "assay" - ] + "PlateID", + "TargetPanel", + "TargetPanelSynID" + ], + "title": "OlinkAssayMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.OlinkFileAnnotationTemplate.schema.json b/model_json_schema/ark.OlinkFileAnnotationTemplate.schema.json index 652bfc34..1b3db053 100644 --- a/model_json_schema/ark.OlinkFileAnnotationTemplate.schema.json +++ b/model_json_schema/ark.OlinkFileAnnotationTemplate.schema.json @@ -1,78 +1,83 @@ { + "$id": "http://example.com/OlinkFileAnnotationTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", + "description": "A template outlining metadata to be provided by contributors that is applied as a preliminary set of annotations to Olink dataset files.", "properties": { - "fileFormat": { + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "FileFormat": { + "description": "Standard file format name or file extension", "enum": [ - "csv", - "geojson", - "svs", - "mtx", - "fastq", - "fcs", - "xlsx", - "tgz", - "czi", - "tsv", "bai", + "bam", "bed", - "pdf", + "bim", + "csv", + "czi", "docx", - "tbi", - "zip", + "dose", "erate", - "rds", + "fam", + "fastq", + "fcs", + "geojson", "h5", - "vcf", - "parquet", - "xls", - "rec", "h5ad", + "info", "mcd", - "txt", + "mtx", + "parquet", + "pdf", "py", - "bam", - "bim", - "dose", - "info", - "fam" - ] - }, - "specimenModality": { - "enum": [ - "unknown", - "multispecimen", - "single specimen" - ] + "rds", + "rec", + "svs", + "tbi", + "tgz", + "tsv", + "txt", + "vcf", + "xls", + "xlsx", + "zip" + ], + "title": "fileFormat" }, - "plateID": { - "not": { - "type": "null" - }, - "minLength": 1 + "PlateID": { + "description": "An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.", + "title": "plateID", + "type": "string" }, - "resourceType": { + "ResourceType": { + "description": "High-level classification of the file content", "enum": [ - "metadata", "code", "experimental data", - "figure" - ] + "figure", + "metadata" + ], + "title": "resourceType" }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" } }, "required": [ - "fileFormat", - "specimenModality", - "plateID", - "resourceType", - "Component" - ] + "Component", + "FileFormat", + "PlateID", + "ResourceType", + "SpecimenModality" + ], + "title": "OlinkFileAnnotationTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.PublicationMetadataTemplate.schema.json b/model_json_schema/ark.PublicationMetadataTemplate.schema.json index 8e29be21..2373050f 100644 --- a/model_json_schema/ark.PublicationMetadataTemplate.schema.json +++ b/model_json_schema/ark.PublicationMetadataTemplate.schema.json @@ -1,137 +1,134 @@ { + "$id": "http://example.com/PublicationMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "PMID": { - "not": { - "type": "null" + "allOf": [ + { + "if": { + "properties": { + "Program": { + "enum": [ + "AMPRA/SLE" + ] + } + } }, - "minLength": 1 + "then": { + "properties": { + "ProgramPhase": { + "not": { + "type": "null" + } + } + }, + "required": [ + "ProgramPhase" + ] + } + } + ], + "description": "A template outlining metadata to use as annotations for Publication ‘file’ entities.", + "properties": { + "AssociatedDataset": { + "description": "The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.", + "pattern": "^syn[0-9]{8}", + "title": "associatedDataset", + "type": "string" }, - "associatedDataset": {}, "Component": { - "not": { - "type": "null" - }, - "minLength": 1 + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" }, - "publicationDate": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "publicationType": { - "enum": [ - "correction", - "pre-print", - "peer-reviewed" - ] + "DOI": { + "description": "Digital object identifier", + "title": "DOI", + "type": "string" }, - "title": { - "not": { - "type": "null" - }, - "minLength": 1 + "Journal": { + "description": "Journal in which the publication was released", + "title": "journal", + "type": "string" }, - "DOI": { - "not": { - "type": "null" - }, - "minLength": 1 + "PMCID": { + "description": "Pubmed Central Identifier, formatted as a compact URI string that will automatically resolve into a URL based on the Synapse bioregistry; e.g., pmc:PMCxxxxxxxx", + "pattern": "^pmc:PMC[0-9]{8}", + "title": "PMCID", + "type": "string" }, - "PMCID": {}, - "year": { - "not": { - "type": "null" - }, - "minLength": 1 + "PMID": { + "description": "PubMed(R) Identifier", + "pattern": "^PMID", + "title": "PMID", + "type": "string" }, - "program": { + "Program": { + "description": "Name of the funding program that supported the generation of data and associated files", "enum": [ "AMP AIM", - "Community Contribution", - "AMP RA/SLE" - ] + "AMP RA/SLE", + "Community Contribution" + ], + "title": "program" }, - "journal": { - "not": { - "type": "null" - }, - "minLength": 1 + "ProgramPhase": { + "description": "A label noting which AMP RA/SLE program phase generated the data.", + "enum": [ + "I", + "II" + ], + "title": "programPhase" }, - "project": { - "type": "array", - "items": { - "enum": [ - "ELLIPSS", - "SLE", - "LOCKIT", - "STAMP", - "RA", - "AIM for RA", - "UMass V-CoRT" - ] - }, - "maxItems": 7 + "Project": { + "description": "A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.", + "enum": [ + "AIM for RA", + "ELLIPSS", + "LOCKIT", + "RA", + "SLE", + "STAMP", + "UMass V-CoRT" + ], + "title": "project" }, - "programPhase": { - "type": "array", - "items": { - "enum": [ - "I", - "II", - "" - ] - }, - "maxItems": 2 + "PublicationDate": { + "description": "The publication date extracted from PubMed database", + "title": "publicationDate", + "type": "string" + }, + "PublicationType": { + "description": "General classification of publication.", + "enum": [ + "correction", + "peer-reviewed", + "pre-print" + ], + "title": "publicationType" + }, + "Title": { + "description": "Title of the publication.", + "title": "title", + "type": "string" + }, + "Year": { + "description": "Year (YYYY) in which the paper was published.", + "pattern": "[1-2][0-9]{3}", + "title": "year", + "type": "string" } }, "required": [ - "PMID", "Component", - "publicationDate", - "publicationType", - "title", "DOI", - "year", - "program", - "journal", - "project" + "Journal", + "PMID", + "Program", + "Project", + "PublicationDate", + "PublicationType", + "Title", + "Year" ], - "allOf": [ - { - "if": { - "properties": { - "program": { - "enum": [ - "AMP RA/SLE" - ] - } - }, - "required": [ - "program" - ] - }, - "then": { - "properties": { - "programPhase": { - "type": "array", - "items": { - "enum": [ - "I", - "II" - ] - }, - "maxItems": 2 - } - }, - "required": [ - "programPhase" - ] - } - } - ] + "title": "PublicationMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.ScRNASeqAssayMetadataTemplate.schema.json b/model_json_schema/ark.ScRNASeqAssayMetadataTemplate.schema.json index 04779334..4f98b90a 100644 --- a/model_json_schema/ark.ScRNASeqAssayMetadataTemplate.schema.json +++ b/model_json_schema/ark.ScRNASeqAssayMetadataTemplate.schema.json @@ -1,285 +1,23 @@ { + "$id": "http://example.com/ScRNASeqAssayMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "alignmentReference": { - "enum": [ - "modified GRCh38", - "unknown", - "GRCh38", - "10x Cell Ranger Human GRCh38 2020-A", - "vdj_GRCh38_alts_ensembl-4.0.0", - "10x Cell Ranger Human GRCh38 2024-A" - ] - }, - "sampleProcessingBatch": {}, - "dataCollectionBatch": {}, - "assay": { - "type": "array", - "items": { - "enum": [ - "Olink Reveal", - "Olink Flex", - "snRNASeq", - "Olink Explore HT", - "CyTOF", - "CE-MS", - "WES", - "Xenium", - "feature barcode sequencing", - "flow cytometry", - "RNASeq", - "WGS", - "SomaScan", - "kiloplex", - "GenePS SeqFISH", - "snATACSeq", - "VDJSeq", - "Olink Target 48", - "Olink Focus", - "NULISA", - "SNP array", - "imaging mass spectrometry", - "scRNASeq", - "Olink Target 96", - "H&E", - "LC-MS/MS", - "imaging mass cytometry", - "CosMX", - "multiplexed ELISA", - "serial IHC", - "scVDJSeq", - "CITESeq", - "ASAPSeq", - "Visium" - ] - }, - "maxItems": 34 - }, - "inputCellCount": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "libraryPrepMethod": { - "enum": [ - "10x Chromium GEM-X Single Cell 3' v4", - "Nextera XT", - "10x Chromium Next GEM Single Cell 5' v1.1", - "Nextera XT DNA", - "in-house library prep", - "SMART-Seq Human TCR with UMI", - "10x GEM-X Flex Gene Expression Human", - "SMARTer Stranded Total RNA v2", - "10x Chromium Next GEM Single Cell 5' v2", - "10x Chromium Single Cell Human TCR", - "Takara Human BCR profiling for Illumina", - "SMART-Seq Human BCR with UMI", - "TruSeq Stranded mRNA", - "NEBNext Ultra II Directional RNA Library", - "10x Chromium Next GEM Single Cell 3'", - "Fluidigm C1 HT", - "Takara Human TCRv2 profiling for Illumina", - "10x Chromium Next GEM Single Cell 3' 3.1", - "Takara Human scTCR profiling for Illumina", - "custom DASH-treatment", - "10x Chromium Fixed RNA Human Transcriptome", - "QIAseq miRNA Library", - "10x Chromium Single Cell Human BCR", - "CEL-Seq2", - "10x GEM-X Universal 5' Gene Expression v3", - "10x Chromium GEM-X Single Cell 5' v3", - "SMART-Seq v4 Ultra Low Input RNA", - "Chromium Next GEM Single Cell ATAC v1.1", - "10x Chromium Next GEM Single Cell ATAC v2", - "Takara Human TCR profiling for Illumina", - "NEBNext Human Immune Sequencing Kit" - ] - }, - "totalReads": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "sequencingSaturation": {}, - "softwareAndVersion": { - "enum": [ - "Cell Ranger v3.1.0", - "Space Ranger 3.0.1", - "Space Ranger 3.1.2", - "demuxlet", - "Cell Ranger v6.0.1", - "Cell Ranger v7.0.1", - "Cell Ranger ATAC v1.1.0", - "Cell Ranger v6.0.0", - "Cell Ranger v8.0.0", - "Cell Ranger v3.0.1", - "Cell Ranger v5.0.0", - "BD FACSDiva 8.0.1", - "Cell Ranger v3.0.2", - "Cell Ranger v9.0.0", - "Cell Ranger v4.0.0", - "Space Ranger 3.0.0", - "Cell Ranger v6.1.0", - "Space Ranger 3.1.3", - "Cell Ranger v5.0.1", - "Cell Ranger v7.2.0", - "Cell Ranger v6.0.2", - "Cell Ranger v3.0.0", - "Cell Ranger v7.0.0", - "Cell Ranger v7.1.0", - "Cell Ranger v6.1.2", - "Cell Ranger 9.0.1", - "Cell Ranger v8.0.1", - "Space Ranger 3.1.0", - "Space Ranger 3.1.1", - "Cell Ranger v6.1.1", - "" - ] - }, - "specimenModality": { - "enum": [ - "unknown", - "single specimen", - "multispecimen" - ] - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "platform": { - "type": "array", - "items": { - "enum": [ - "Thermo Fisher Attune NxT", - "BD FACSLyric Clinical", - "BD FACSAria III", - "BD FACSCanto II", - "Chromium Controller", - "Thermo Fisher Attune Xenith", - "Chromium X", - "Cytek Aurora Evo", - "Illumina NovaSeq 6000", - "Xenium", - "BD FACSymphony S6", - "Sony MA900", - "Helios Mass Cytometer", - "Thermo Fisher Attune CytPix", - "Illumina NovaSeq X", - "BD FACSMelody", - "CyTOF XT", - "Chromium GEM-X Single Cell 3' Chip v4", - "Chromium Xo", - "Chromium Next GEM Chip G", - "Illumina HiSeq 2500", - "Chromium Next GEM Chip Q", - "Cytek Aurora", - "Illumina NextSeq 500", - "unknown", - "Chromium Next GEM Chip K", - "Fluidigm BioMark", - "BD FACSCanto", - "BD FACSDiscover A8", - "Chromium iX", - "Illumina HiSeq X Ten", - "Olink Signature Q100", - "none", - "Chromium Next GEM Chip H", - "Chromium Next GEM Chip M", - "GEM-X Flex Gene Expression Chip", - "BD FACSDiscover S8", - "Visium CytAssist", - "Not Applicable", - "GEM-X OCM 5' Chip", - "BD FACSAria Fusion cell sorter", - "Hyperion", - "BD LSRFortessa" - ] - }, - "maxItems": 43 - }, - "nucleicAcidSource": { - "type": "array", - "items": { - "enum": [ - "poly(A) RNA", - "CRISPR protospacer feature barcode", - "gDNA", - "TCR mRNA", - "multiplexing oligo", - "BCR mRNA", - "globin-depleted RNA", - "intracellular protein feature barcode", - "surface protein feature barcode", - "Tn5-accessible gDNA", - "rRNA-depleted RNA", - "antigen capture barcode" - ] - }, - "maxItems": 12 - }, - "percentCellViability": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "10xProbeSetReference": { - "enum": [ - "Flex Human Transcriptome Probe Set v1.1.0", - "Flex Human Transcriptome Probe Set v1.0.1", - "custom probe set", - "Visium Human Transcriptome v1", - "Visium Human Transcriptome v2", - "" - ] - }, - "biospecimenID": {}, - "libraryID": {} - }, - "required": [ - "alignmentReference", - "assay", - "inputCellCount", - "libraryPrepMethod", - "totalReads", - "specimenModality", - "Component", - "platform", - "nucleicAcidSource", - "percentCellViability" - ], "allOf": [ { "if": { "properties": { - "libraryPrepMethod": { + "LibraryPrepMethod": { "enum": [ - "10x GEM-X Flex Gene Expression Human" + "10xGEM-XFlexGeneExpressionHuman" ] } - }, - "required": [ - "libraryPrepMethod" - ] + } }, "then": { "properties": { "10xProbeSetReference": { - "enum": [ - "Flex Human Transcriptome Probe Set v1.1.0", - "Flex Human Transcriptome Probe Set v1.0.1", - "custom probe set", - "Visium Human Transcriptome v1", - "Visium Human Transcriptome v2" - ] + "not": { + "type": "null" + } } }, "required": [ @@ -290,56 +28,326 @@ { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "single specimen" + "Multispecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "LibraryID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "LibraryID" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "multispecimen" + "Singlespecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "libraryID": { + "BiospecimenID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "libraryID" + "BiospecimenID" ] } } - ] + ], + "description": "A template outlining metadata to be collected for each library in a scRNA-seq dataset.", + "properties": { + "10xProbeSetReference": { + "description": "Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.", + "enum": [ + "Flex Human Transcriptome Probe Set v1.0.1", + "Flex Human Transcriptome Probe Set v1.1.0", + "Visium Human Transcriptome v1", + "Visium Human Transcriptome v2", + "custom probe set" + ], + "title": "10xProbeSetReference" + }, + "AlignmentReference": { + "description": "The genomic/transcriptomic reference used for performing read alignment against.", + "enum": [ + "10x Cell Ranger Human GRCh38 2020-A", + "10x Cell Ranger Human GRCh38 2024-A", + "GRCh38", + "modified GRCh38", + "unknown", + "vdj_GRCh38_alts_ensembl-4.0.0" + ], + "title": "alignmentReference" + }, + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "enum": [ + "ASAPSeq", + "CE-MS", + "CITESeq", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", + "LC-MS/MS", + "NULISA", + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", + "WES", + "WGS", + "Xenium", + "feature barcode sequencing", + "flow cytometry", + "imaging mass cytometry", + "imaging mass spectrometry", + "kiloplex", + "multiplexed ELISA", + "scRNASeq", + "scVDJSeq", + "serial IHC", + "snATACSeq", + "snRNASeq" + ], + "title": "assay" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "biospecimenID", + "type": "string" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataCollectionBatch": { + "description": "A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.", + "title": "dataCollectionBatch", + "type": "string" + }, + "InputCellCount": { + "description": "An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.", + "title": "inputCellCount", + "type": "integer" + }, + "LibraryID": { + "description": "A library label or name, unique within an experiment, used to distinguish sequencing libraries.", + "title": "libraryID", + "type": "string" + }, + "LibraryPrepMethod": { + "description": "Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.", + "enum": [ + "10x Chromium Fixed RNA Human Transcriptome", + "10x Chromium GEM-X Single Cell 3' v4", + "10x Chromium GEM-X Single Cell 5' v3", + "10x Chromium Next GEM Single Cell 3'", + "10x Chromium Next GEM Single Cell 3' 3.1", + "10x Chromium Next GEM Single Cell 5' v1.1", + "10x Chromium Next GEM Single Cell 5' v2", + "10x Chromium Next GEM Single Cell ATAC v2", + "10x Chromium Single Cell Human BCR", + "10x Chromium Single Cell Human TCR", + "10x GEM-X Flex Gene Expression Human", + "10x GEM-X Universal 5' Gene Expression v3", + "CEL-Seq2", + "Chromium Next GEM Single Cell ATAC v1.1", + "Fluidigm C1 HT", + "NEBNext Human Immune Sequencing Kit", + "NEBNext Ultra II Directional RNA Library", + "Nextera XT", + "Nextera XT DNA", + "QIAseq miRNA Library", + "SMART-Seq Human BCR with UMI", + "SMART-Seq Human TCR with UMI", + "SMART-Seq v4 Ultra Low Input RNA", + "SMARTer Stranded Total RNA v2", + "Takara Human BCR profiling for Illumina", + "Takara Human TCR profiling for Illumina", + "Takara Human TCRv2 profiling for Illumina", + "Takara Human scTCR profiling for Illumina", + "TruSeq Stranded mRNA", + "custom DASH-treatment", + "in-house library prep" + ], + "title": "libraryPrepMethod" + }, + "NucleicAcidSource": { + "description": "The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.", + "enum": [ + "BCR mRNA", + "CRISPR protospacer feature barcode", + "TCR mRNA", + "Tn5-accessible gDNA", + "antigen capture barcode", + "gDNA", + "globin-depleted RNA", + "intracellular protein feature barcode", + "multiplexing oligo", + "poly(A) RNA", + "rRNA-depleted RNA", + "surface protein feature barcode" + ], + "title": "nucleicAcidSource" + }, + "PercentCellViability": { + "description": "A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.", + "maximum": 100.0, + "minimum": 50.0, + "title": "percentCellViability", + "type": "integer" + }, + "Platform": { + "description": "The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.", + "enum": [ + "BD FACSAria Fusion cell sorter", + "BD FACSAria III", + "BD FACSCanto", + "BD FACSCanto II", + "BD FACSDiscover A8", + "BD FACSDiscover S8", + "BD FACSLyric Clinical", + "BD FACSMelody", + "BD FACSymphony S6", + "BD LSRFortessa", + "Chromium Controller", + "Chromium GEM-X Single Cell 3' Chip v4", + "Chromium Next GEM Chip G", + "Chromium Next GEM Chip H", + "Chromium Next GEM Chip K", + "Chromium Next GEM Chip M", + "Chromium Next GEM Chip Q", + "Chromium X", + "Chromium Xo", + "Chromium iX", + "CyTOF XT", + "Cytek Aurora", + "Cytek Aurora Evo", + "Fluidigm BioMark", + "GEM-X Flex Gene Expression Chip", + "GEM-X OCM 5' Chip", + "Helios Mass Cytometer", + "Hyperion", + "Illumina HiSeq 2500", + "Illumina HiSeq X Ten", + "Illumina NextSeq 500", + "Illumina NovaSeq 6000", + "Illumina NovaSeq X", + "Not Applicable", + "Olink Signature Q100", + "Sony MA900", + "Thermo Fisher Attune CytPix", + "Thermo Fisher Attune NxT", + "Thermo Fisher Attune Xenith", + "Visium CytAssist", + "Xenium", + "none", + "unknown" + ], + "title": "platform" + }, + "SampleProcessingBatch": { + "description": "A label indicating batching of sample processing or preparation that occurs prior to data collection.", + "title": "sampleProcessingBatch", + "type": "string" + }, + "SequencingSaturation": { + "description": "A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.", + "maximum": 1.0, + "minimum": 0.0, + "title": "sequencingSaturation", + "type": "number" + }, + "SoftwareAndVersion": { + "description": "Relevant software and version used to generate the data file.", + "enum": [ + "BD FACSDiva 8.0.1", + "Cell Ranger 9.0.1", + "Cell Ranger ATAC v1.1.0", + "Cell Ranger v3.0.0", + "Cell Ranger v3.0.1", + "Cell Ranger v3.0.2", + "Cell Ranger v3.1.0", + "Cell Ranger v4.0.0", + "Cell Ranger v5.0.0", + "Cell Ranger v5.0.1", + "Cell Ranger v6.0.0", + "Cell Ranger v6.0.1", + "Cell Ranger v6.0.2", + "Cell Ranger v6.1.0", + "Cell Ranger v6.1.1", + "Cell Ranger v6.1.2", + "Cell Ranger v7.0.0", + "Cell Ranger v7.0.1", + "Cell Ranger v7.1.0", + "Cell Ranger v7.2.0", + "Cell Ranger v8.0.0", + "Cell Ranger v8.0.1", + "Cell Ranger v9.0.0", + "Space Ranger 3.0.0", + "Space Ranger 3.0.1", + "Space Ranger 3.1.0", + "Space Ranger 3.1.1", + "Space Ranger 3.1.2", + "Space Ranger 3.1.3", + "demuxlet" + ], + "title": "softwareAndVersion" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + }, + "TotalReads": { + "description": "Total number of reads sequenced from the library.", + "title": "totalReads", + "type": "integer" + } + }, + "required": [ + "AlignmentReference", + "Assay", + "Component", + "InputCellCount", + "LibraryPrepMethod", + "NucleicAcidSource", + "PercentCellViability", + "Platform", + "SpecimenModality", + "TotalReads" + ], + "title": "ScRNASeqAssayMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.ScRNASeqProcessedDataAnnotationTemplate.schema.json b/model_json_schema/ark.ScRNASeqProcessedDataAnnotationTemplate.schema.json index c50476a0..bee16879 100644 --- a/model_json_schema/ark.ScRNASeqProcessedDataAnnotationTemplate.schema.json +++ b/model_json_schema/ark.ScRNASeqProcessedDataAnnotationTemplate.schema.json @@ -1,387 +1,370 @@ { + "$id": "http://example.com/ScRNASeqProcessedDataAnnotationTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "resourceType": { - "type": "array", - "items": { - "enum": [ - "metadata", - "experimental data" - ] - }, - "maxItems": 2 - }, - "assay": { - "type": "array", - "items": { - "enum": [ - "NULISA", - "Olink Target 96", - "serial IHC", - "scRNASeq", - "imaging mass cytometry", - "Olink Target 48", - "Olink Focus", - "CyTOF", - "ASAPSeq", - "Olink Flex", - "H&E", - "flow cytometry", - "SomaScan", - "CosMX", - "Xenium", - "kiloplex", - "CITESeq", - "VDJSeq", - "Olink Reveal", - "Olink Explore HT", - "SNP array", - "feature barcode sequencing", - "LC-MS/MS", - "Visium", - "CE-MS", - "imaging mass spectrometry", - "WES", - "scVDJSeq", - "multiplexed ELISA", - "snATACSeq", - "snRNASeq", - "RNASeq", - "GenePS SeqFISH", - "WGS" - ] - }, - "maxItems": 34 - }, - "fileFormat": { - "enum": [ - "bai", - "fam", - "bim", - "bam", - "mtx", - "tgz", - "xlsx", - "xls", - "txt", - "bed", - "h5", - "tsv", - "csv", - "h5ad", - "Rds", - "zip" - ] - }, - "dataLevel": { - "enum": [ - "1", - "2", - "5", - "3", - "4" - ] - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "cellRangerOutput": { - "enum": [ - "raw MEX", - "filtered_peak_bc_matrix", - "filtered_feature_bc_matrix", - "Not Applicable", - "raw_feature_bc_matrix", - "raw_peak_bc_matrix", - "filtered MEX" - ] - }, - "specimenModality": { - "enum": [ - "multispecimen", - "single specimen", - "unknown" - ] - }, - "metadataType": { - "enum": [ - "single-cell metadata", - "" - ] - }, - "processedDataType": { - "type": "array", - "items": { - "enum": [ - "differential expression results", - "epigenomic peaks", - "gene counts", - "barcode counts", - "" - ] - }, - "maxItems": 4 - }, - "targetPanelSize": {}, - "targetPanelSynID": {}, - "targetPanel": {}, - "RObjectClass": { - "enum": [ - "Symphony reference", - "vector", - "data.frame", - "sparse matrix", - "ROCR prediction.object", - "list", - "matrix", - "SummarizedExperiment", - "Seurat object", - "" - ] - }, - "individualID": {}, - "biospecimenID": {} - }, - "required": [ - "resourceType", - "assay", - "fileFormat", - "dataLevel", - "Component", - "cellRangerOutput", - "specimenModality" - ], "allOf": [ { "if": { "properties": { - "resourceType": { + "Assay": { "enum": [ - "metadata" + "Featurebarcodesequencing" ] } - }, - "required": [ - "resourceType" - ] + } }, "then": { "properties": { - "metadataType": { - "enum": [ - "single-cell metadata" - ] + "TargetPanel": { + "not": { + "type": "null" + } } }, "required": [ - "metadataType" + "TargetPanel" ] } }, { "if": { "properties": { - "resourceType": { + "Assay": { "enum": [ - "experimental data" + "Featurebarcodesequencing" ] } - }, - "required": [ - "resourceType" - ] + } }, "then": { "properties": { - "processedDataType": { - "type": "array", - "items": { - "enum": [ - "differential expression results", - "epigenomic peaks", - "gene counts", - "barcode counts", - "" - ] - }, - "maxItems": 4 + "TargetPanelSize": { + "not": { + "type": "null" + } } }, "required": [ - "processedDataType" + "TargetPanelSize" ] } }, { "if": { "properties": { - "assay": { + "Assay": { "enum": [ - "feature barcode sequencing" + "Featurebarcodesequencing" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanelSize": {} + "TargetPanelSynID": { + "not": { + "type": "null" + } + } }, "required": [ - "targetPanelSize" + "TargetPanelSynID" ] } }, { "if": { "properties": { - "assay": { + "FileFormat": { "enum": [ - "feature barcode sequencing" + "Rds" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanelSynID": { + "RObjectClass": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "targetPanelSynID" + "RObjectClass" ] } }, { "if": { "properties": { - "assay": { + "ResourceType": { "enum": [ - "feature barcode sequencing" + "Experimentaldata" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanel": { + "ProcessedDataType": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "targetPanel" + "ProcessedDataType" ] } }, { "if": { "properties": { - "fileFormat": { + "ResourceType": { "enum": [ - "Rds" + "Metadata" ] } - }, - "required": [ - "fileFormat" - ] + } }, "then": { "properties": { - "RObjectClass": { - "enum": [ - "Symphony reference", - "vector", - "data.frame", - "sparse matrix", - "ROCR prediction.object", - "list", - "matrix", - "SummarizedExperiment", - "Seurat object", - "" - ] + "MetadataType": { + "not": { + "type": "null" + } } }, "required": [ - "RObjectClass" + "MetadataType" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "single specimen" + "Singlespecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "individualID": { + "BiospecimenID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "individualID" + "BiospecimenID" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "single specimen" + "Singlespecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "IndividualID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "IndividualID" ] } } - ] + ], + "description": "A data contributor template outlining metadata to be collected as file annotations for scRNA-seq processed data files (i.e., anything not a fastq file).", + "properties": { + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "enum": [ + "ASAPSeq", + "CE-MS", + "CITESeq", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", + "LC-MS/MS", + "NULISA", + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", + "WES", + "WGS", + "Xenium", + "feature barcode sequencing", + "flow cytometry", + "imaging mass cytometry", + "imaging mass spectrometry", + "kiloplex", + "multiplexed ELISA", + "scRNASeq", + "scVDJSeq", + "serial IHC", + "snATACSeq", + "snRNASeq" + ], + "title": "assay" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "biospecimenID", + "type": "string" + }, + "CellRangerOutput": { + "description": "10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz", + "enum": [ + "Not Applicable", + "filtered MEX", + "filtered_feature_bc_matrix", + "filtered_peak_bc_matrix", + "raw MEX", + "raw_feature_bc_matrix", + "raw_peak_bc_matrix" + ], + "title": "cellRangerOutput" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataLevel": { + "description": "Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.", + "enum": [ + "1", + "2", + "3", + "4", + "5" + ], + "title": "dataLevel" + }, + "FileFormat": { + "description": "Standard file format name or file extension", + "enum": [ + "Rds", + "bai", + "bam", + "bed", + "bim", + "csv", + "fam", + "h5", + "h5ad", + "mtx", + "tgz", + "tsv", + "txt", + "xls", + "xlsx", + "zip" + ], + "title": "fileFormat" + }, + "IndividualID": { + "description": "Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "individualID", + "type": "string" + }, + "MetadataType": { + "description": "A label further classifying the content of metadata resource.", + "enum": [ + "single-cell metadata" + ], + "title": "metadataType" + }, + "ProcessedDataType": { + "description": "A label used for file annotations to provide a brief description of the processed data file.", + "enum": [ + "barcode counts", + "differential expression results", + "epigenomic peaks", + "gene counts" + ], + "title": "processedDataType" + }, + "RObjectClass": { + "description": "Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.", + "enum": [ + "ROCR prediction.object", + "Seurat object", + "SummarizedExperiment", + "Symphony reference", + "data.frame", + "list", + "matrix", + "sparse matrix", + "vector" + ], + "title": "RObjectClass" + }, + "ResourceType": { + "description": "High-level classification of the file content", + "enum": [ + "experimental data", + "metadata" + ], + "title": "resourceType" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + }, + "TargetPanel": { + "description": "A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.", + "title": "targetPanel", + "type": "string" + }, + "TargetPanelSize": { + "description": "The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).", + "title": "targetPanelSize", + "type": "integer" + }, + "TargetPanelSynID": { + "description": "In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.", + "pattern": "^syn[0-9]{8}", + "title": "targetPanelSynID", + "type": "string" + } + }, + "required": [ + "Assay", + "CellRangerOutput", + "Component", + "DataLevel", + "FileFormat", + "ResourceType", + "SpecimenModality" + ], + "title": "ScRNASeqProcessedDataAnnotationTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.ScVDJSeqProcessedDataAnnotationTemplate.schema.json b/model_json_schema/ark.ScVDJSeqProcessedDataAnnotationTemplate.schema.json index f7083bd3..14ed0f02 100644 --- a/model_json_schema/ark.ScVDJSeqProcessedDataAnnotationTemplate.schema.json +++ b/model_json_schema/ark.ScVDJSeqProcessedDataAnnotationTemplate.schema.json @@ -1,375 +1,356 @@ { + "$id": "http://example.com/ScVDJSeqProcessedDataAnnotationTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "dataLevel": { - "enum": [ - "2", - "5", - "4", - "3", - "1" - ] - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "specimenModality": { - "enum": [ - "multispecimen", - "unknown", - "single specimen" - ] - }, - "fileFormat": { - "enum": [ - "h5ad", - "tsv", - "bai", - "zip", - "bam", - "fam", - "bim", - "mtx", - "Rds", - "h5", - "xlsx", - "xls", - "tgz", - "txt", - "bed", - "csv" - ] - }, - "assay": { - "type": "array", - "items": { - "enum": [ - "NULISA", - "SNP array", - "snRNASeq", - "snATACSeq", - "imaging mass spectrometry", - "scRNASeq", - "kiloplex", - "Olink Target 48", - "Visium", - "Olink Focus", - "imaging mass cytometry", - "serial IHC", - "RNASeq", - "Xenium", - "WES", - "flow cytometry", - "Olink Flex", - "CITESeq", - "GenePS SeqFISH", - "H&E", - "Olink Explore HT", - "Olink Reveal", - "feature barcode sequencing", - "scVDJSeq", - "CosMX", - "LC-MS/MS", - "CE-MS", - "Olink Target 96", - "VDJSeq", - "ASAPSeq", - "multiplexed ELISA", - "SomaScan", - "WGS", - "CyTOF" - ] - }, - "maxItems": 34 - }, - "resourceType": { - "type": "array", - "items": { - "enum": [ - "metadata", - "experimental data" - ] - }, - "maxItems": 2 - }, - "biospecimenID": {}, - "individualID": {}, - "RObjectClass": { - "enum": [ - "vector", - "ROCR prediction.object", - "SummarizedExperiment", - "matrix", - "Symphony reference", - "sparse matrix", - "list", - "data.frame", - "Seurat object", - "" - ] - }, - "targetPanelSize": {}, - "targetPanelSynID": {}, - "targetPanel": {}, - "metadataType": { - "enum": [ - "single-cell metadata", - "" - ] - }, - "processedDataType": { - "type": "array", - "items": { - "enum": [ - "barcode counts", - "gene counts", - "differential expression results", - "epigenomic peaks", - "" - ] - }, - "maxItems": 4 - } - }, - "required": [ - "dataLevel", - "Component", - "specimenModality", - "fileFormat", - "assay", - "resourceType" - ], "allOf": [ { "if": { "properties": { - "specimenModality": { + "Assay": { "enum": [ - "single specimen" + "Featurebarcodesequencing" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "TargetPanel": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "TargetPanel" ] } }, { "if": { "properties": { - "specimenModality": { + "Assay": { "enum": [ - "single specimen" + "Featurebarcodesequencing" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "individualID": { + "TargetPanelSize": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "individualID" + "TargetPanelSize" ] } }, { "if": { "properties": { - "fileFormat": { + "Assay": { "enum": [ - "Rds" + "Featurebarcodesequencing" ] } - }, - "required": [ - "fileFormat" - ] + } }, "then": { "properties": { - "RObjectClass": { - "enum": [ - "vector", - "ROCR prediction.object", - "SummarizedExperiment", - "matrix", - "Symphony reference", - "sparse matrix", - "list", - "data.frame", - "Seurat object", - "" - ] + "TargetPanelSynID": { + "not": { + "type": "null" + } } }, "required": [ - "RObjectClass" + "TargetPanelSynID" ] } }, { "if": { "properties": { - "assay": { + "FileFormat": { "enum": [ - "feature barcode sequencing" + "Rds" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanelSize": {} + "RObjectClass": { + "not": { + "type": "null" + } + } }, "required": [ - "targetPanelSize" + "RObjectClass" ] } }, { "if": { "properties": { - "assay": { + "ResourceType": { "enum": [ - "feature barcode sequencing" + "Experimentaldata" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanelSynID": { + "ProcessedDataType": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "targetPanelSynID" + "ProcessedDataType" ] } }, { "if": { "properties": { - "assay": { + "ResourceType": { "enum": [ - "feature barcode sequencing" + "Metadata" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanel": { + "MetadataType": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "targetPanel" + "MetadataType" ] } }, { "if": { "properties": { - "resourceType": { + "SpecimenModality": { "enum": [ - "metadata" + "Singlespecimen" ] } - }, - "required": [ - "resourceType" - ] + } }, "then": { "properties": { - "metadataType": { - "enum": [ - "single-cell metadata" - ] + "BiospecimenID": { + "not": { + "type": "null" + } } }, "required": [ - "metadataType" + "BiospecimenID" ] } }, { "if": { "properties": { - "resourceType": { + "SpecimenModality": { "enum": [ - "experimental data" + "Singlespecimen" ] } - }, - "required": [ - "resourceType" - ] + } }, "then": { "properties": { - "processedDataType": { - "type": "array", - "items": { - "enum": [ - "barcode counts", - "gene counts", - "differential expression results", - "epigenomic peaks", - "" - ] - }, - "maxItems": 4 + "IndividualID": { + "not": { + "type": "null" + } } }, "required": [ - "processedDataType" + "IndividualID" ] } } - ] + ], + "description": "A data contributor template outlining metadata to be collected as file annotations for scVDJ-seq (i.e., immune repertoire profiling) processed data files (i.e., anything not a fastq file).", + "properties": { + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "enum": [ + "ASAPSeq", + "CE-MS", + "CITESeq", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", + "LC-MS/MS", + "NULISA", + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", + "WES", + "WGS", + "Xenium", + "feature barcode sequencing", + "flow cytometry", + "imaging mass cytometry", + "imaging mass spectrometry", + "kiloplex", + "multiplexed ELISA", + "scRNASeq", + "scVDJSeq", + "serial IHC", + "snATACSeq", + "snRNASeq" + ], + "title": "assay" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "biospecimenID", + "type": "string" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataLevel": { + "description": "Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.", + "enum": [ + "1", + "2", + "3", + "4", + "5" + ], + "title": "dataLevel" + }, + "FileFormat": { + "description": "Standard file format name or file extension", + "enum": [ + "Rds", + "bai", + "bam", + "bed", + "bim", + "csv", + "fam", + "h5", + "h5ad", + "mtx", + "tgz", + "tsv", + "txt", + "xls", + "xlsx", + "zip" + ], + "title": "fileFormat" + }, + "IndividualID": { + "description": "Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "individualID", + "type": "string" + }, + "MetadataType": { + "description": "A label further classifying the content of metadata resource.", + "enum": [ + "single-cell metadata" + ], + "title": "metadataType" + }, + "ProcessedDataType": { + "description": "A label used for file annotations to provide a brief description of the processed data file.", + "enum": [ + "barcode counts", + "differential expression results", + "epigenomic peaks", + "gene counts" + ], + "title": "processedDataType" + }, + "RObjectClass": { + "description": "Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.", + "enum": [ + "ROCR prediction.object", + "Seurat object", + "SummarizedExperiment", + "Symphony reference", + "data.frame", + "list", + "matrix", + "sparse matrix", + "vector" + ], + "title": "RObjectClass" + }, + "ResourceType": { + "description": "High-level classification of the file content", + "enum": [ + "experimental data", + "metadata" + ], + "title": "resourceType" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + }, + "TargetPanel": { + "description": "A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.", + "title": "targetPanel", + "type": "string" + }, + "TargetPanelSize": { + "description": "The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).", + "title": "targetPanelSize", + "type": "integer" + }, + "TargetPanelSynID": { + "description": "In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.", + "pattern": "^syn[0-9]{8}", + "title": "targetPanelSynID", + "type": "string" + } + }, + "required": [ + "Assay", + "Component", + "DataLevel", + "FileFormat", + "ResourceType", + "SpecimenModality" + ], + "title": "ScVDJSeqProcessedDataAnnotationTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.SnATAC-seqAssayMetadataTemplate.schema.json b/model_json_schema/ark.SnATAC-seqAssayMetadataTemplate.schema.json index 857ff394..5afc99dc 100644 --- a/model_json_schema/ark.SnATAC-seqAssayMetadataTemplate.schema.json +++ b/model_json_schema/ark.SnATAC-seqAssayMetadataTemplate.schema.json @@ -1,345 +1,353 @@ { + "$id": "http://example.com/SnATAC-seqAssayMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "assay": { - "type": "array", - "items": { - "enum": [ - "kiloplex", - "flow cytometry", - "RNASeq", - "GenePS SeqFISH", - "Olink Target 48", - "SNP array", - "Olink Flex", - "LC-MS/MS", - "CosMX", - "serial IHC", - "snRNASeq", - "imaging mass spectrometry", - "feature barcode sequencing", - "Olink Explore HT", - "H&E", - "Visium", - "WGS", - "scVDJSeq", - "Olink Target 96", - "NULISA", - "CE-MS", - "CITESeq", - "ASAPSeq", - "SomaScan", - "CyTOF", - "Olink Reveal", - "multiplexed ELISA", - "Xenium", - "Olink Focus", - "imaging mass cytometry", - "WES", - "scRNASeq", - "snATACSeq", - "VDJSeq" - ] - }, - "maxItems": 34 - }, - "nucleicAcidSource": { - "type": "array", - "items": { - "enum": [ - "CRISPR protospacer feature barcode", - "globin-depleted RNA", - "Tn5-accessible gDNA", - "intracellular protein feature barcode", - "BCR mRNA", - "poly(A) RNA", - "surface protein feature barcode", - "gDNA", - "rRNA-depleted RNA", - "multiplexing oligo", - "TCR mRNA", - "antigen capture barcode" - ] - }, - "maxItems": 12 - }, - "specimenModality": { - "enum": [ - "multispecimen", - "unknown", - "single specimen" - ] - }, - "dataCollectionBatch": {}, - "softwareAndVersion": { - "enum": [ - "Cell Ranger v8.0.0", - "Cell Ranger 9.0.1", - "Cell Ranger ATAC v1.1.0", - "Cell Ranger v3.1.0", - "Space Ranger 3.1.2", - "Cell Ranger v5.0.0", - "Cell Ranger v6.1.2", - "Cell Ranger v9.0.0", - "Cell Ranger v5.0.1", - "Cell Ranger v4.0.0", - "Cell Ranger v7.1.0", - "Space Ranger 3.0.0", - "Cell Ranger v6.1.0", - "Space Ranger 3.1.1", - "demuxlet", - "Cell Ranger v8.0.1", - "Cell Ranger v7.0.0", - "Space Ranger 3.0.1", - "Space Ranger 3.1.0", - "Cell Ranger v6.0.1", - "Cell Ranger v6.1.1", - "Cell Ranger v3.0.0", - "Cell Ranger v7.0.1", - "Cell Ranger v3.0.1", - "Cell Ranger v3.0.2", - "Space Ranger 3.1.3", - "BD FACSDiva 8.0.1", - "Cell Ranger v6.0.2", - "Cell Ranger v6.0.0", - "Cell Ranger v7.2.0", - "" - ] - }, - "alignmentReference": { - "enum": [ - "10x Cell Ranger Human GRCh38 2020-A", - "GRCh38", - "10x Cell Ranger Human GRCh38 2024-A", - "vdj_GRCh38_alts_ensembl-4.0.0", - "unknown", - "modified GRCh38" - ] - }, - "totalReads": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "percentCellViability": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "platform": { - "type": "array", - "items": { - "enum": [ - "BD FACSDiscover S8", - "Chromium Xo", - "Visium CytAssist", - "Illumina NovaSeq 6000", - "GEM-X Flex Gene Expression Chip", - "Cytek Aurora Evo", - "none", - "Chromium X", - "BD FACSAria III", - "Olink Signature Q100", - "Hyperion", - "Chromium Controller", - "Illumina HiSeq X Ten", - "Chromium Next GEM Chip G", - "Illumina NextSeq 500", - "Chromium Next GEM Chip H", - "BD FACSMelody", - "BD FACSDiscover A8", - "Not Applicable", - "Chromium Next GEM Chip K", - "Chromium GEM-X Single Cell 3' Chip v4", - "Cytek Aurora", - "BD FACSymphony S6", - "BD LSRFortessa", - "Sony MA900", - "Thermo Fisher Attune CytPix", - "Xenium", - "Helios Mass Cytometer", - "CyTOF XT", - "Fluidigm BioMark", - "Chromium Next GEM Chip M", - "Thermo Fisher Attune Xenith", - "Illumina HiSeq 2500", - "Chromium iX", - "Illumina NovaSeq X", - "BD FACSCanto II", - "BD FACSCanto", - "unknown", - "BD FACSLyric Clinical", - "BD FACSAria Fusion cell sorter", - "Chromium Next GEM Chip Q", - "GEM-X OCM 5' Chip", - "Thermo Fisher Attune NxT" - ] - }, - "maxItems": 43 - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "sequencingSaturation": {}, - "sampleProcessingBatch": {}, - "inputCellCount": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "libraryPrepMethod": { - "enum": [ - "Takara Human TCR profiling for Illumina", - "SMART-Seq Human TCR with UMI", - "10x Chromium GEM-X Single Cell 3' v4", - "Takara Human scTCR profiling for Illumina", - "SMART-Seq v4 Ultra Low Input RNA", - "10x Chromium Next GEM Single Cell 5' v2", - "10x Chromium Next GEM Single Cell 3' 3.1", - "Takara Human BCR profiling for Illumina", - "10x GEM-X Flex Gene Expression Human", - "QIAseq miRNA Library", - "10x Chromium Next GEM Single Cell 5' v1.1", - "Chromium Next GEM Single Cell ATAC v1.1", - "SMARTer Stranded Total RNA v2", - "Takara Human TCRv2 profiling for Illumina", - "10x Chromium Next GEM Single Cell 3'", - "NEBNext Human Immune Sequencing Kit", - "CEL-Seq2", - "10x Chromium Fixed RNA Human Transcriptome", - "10x Chromium Single Cell Human BCR", - "10x Chromium GEM-X Single Cell 5' v3", - "in-house library prep", - "custom DASH-treatment", - "Fluidigm C1 HT", - "TruSeq Stranded mRNA", - "NEBNext Ultra II Directional RNA Library", - "10x GEM-X Universal 5' Gene Expression v3", - "SMART-Seq Human BCR with UMI", - "Nextera XT DNA", - "10x Chromium Single Cell Human TCR", - "Nextera XT", - "10x Chromium Next GEM Single Cell ATAC v2" - ] - }, - "libraryID": {}, - "biospecimenID": {}, - "10xProbeSetReference": { - "enum": [ - "Flex Human Transcriptome Probe Set v1.0.1", - "Visium Human Transcriptome v1", - "Flex Human Transcriptome Probe Set v1.1.0", - "Visium Human Transcriptome v2", - "custom probe set", - "" - ] - } - }, - "required": [ - "assay", - "nucleicAcidSource", - "specimenModality", - "alignmentReference", - "totalReads", - "percentCellViability", - "platform", - "Component", - "inputCellCount", - "libraryPrepMethod" - ], "allOf": [ { "if": { "properties": { - "specimenModality": { + "LibraryPrepMethod": { "enum": [ - "multispecimen" + "10xGEM-XFlexGeneExpressionHuman" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "libraryID": { + "10xProbeSetReference": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "libraryID" + "10xProbeSetReference" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "single specimen" + "Multispecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "LibraryID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "LibraryID" ] } }, { "if": { "properties": { - "libraryPrepMethod": { + "SpecimenModality": { "enum": [ - "10x GEM-X Flex Gene Expression Human" + "Singlespecimen" ] } - }, - "required": [ - "libraryPrepMethod" - ] + } }, "then": { "properties": { - "10xProbeSetReference": { - "enum": [ - "Flex Human Transcriptome Probe Set v1.0.1", - "Visium Human Transcriptome v1", - "Flex Human Transcriptome Probe Set v1.1.0", - "Visium Human Transcriptome v2", - "custom probe set" - ] + "BiospecimenID": { + "not": { + "type": "null" + } } }, "required": [ - "10xProbeSetReference" + "BiospecimenID" ] } } - ] + ], + "description": "A template outlining metadata to be collected for each library in a snATAC-seq dataset.", + "properties": { + "10xProbeSetReference": { + "description": "Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.", + "enum": [ + "Flex Human Transcriptome Probe Set v1.0.1", + "Flex Human Transcriptome Probe Set v1.1.0", + "Visium Human Transcriptome v1", + "Visium Human Transcriptome v2", + "custom probe set" + ], + "title": "10xProbeSetReference" + }, + "AlignmentReference": { + "description": "The genomic/transcriptomic reference used for performing read alignment against.", + "enum": [ + "10x Cell Ranger Human GRCh38 2020-A", + "10x Cell Ranger Human GRCh38 2024-A", + "GRCh38", + "modified GRCh38", + "unknown", + "vdj_GRCh38_alts_ensembl-4.0.0" + ], + "title": "alignmentReference" + }, + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "enum": [ + "ASAPSeq", + "CE-MS", + "CITESeq", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", + "LC-MS/MS", + "NULISA", + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", + "WES", + "WGS", + "Xenium", + "feature barcode sequencing", + "flow cytometry", + "imaging mass cytometry", + "imaging mass spectrometry", + "kiloplex", + "multiplexed ELISA", + "scRNASeq", + "scVDJSeq", + "serial IHC", + "snATACSeq", + "snRNASeq" + ], + "title": "assay" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "biospecimenID", + "type": "string" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataCollectionBatch": { + "description": "A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.", + "title": "dataCollectionBatch", + "type": "string" + }, + "InputCellCount": { + "description": "An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.", + "title": "inputCellCount", + "type": "integer" + }, + "LibraryID": { + "description": "A library label or name, unique within an experiment, used to distinguish sequencing libraries.", + "title": "libraryID", + "type": "string" + }, + "LibraryPrepMethod": { + "description": "Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.", + "enum": [ + "10x Chromium Fixed RNA Human Transcriptome", + "10x Chromium GEM-X Single Cell 3' v4", + "10x Chromium GEM-X Single Cell 5' v3", + "10x Chromium Next GEM Single Cell 3'", + "10x Chromium Next GEM Single Cell 3' 3.1", + "10x Chromium Next GEM Single Cell 5' v1.1", + "10x Chromium Next GEM Single Cell 5' v2", + "10x Chromium Next GEM Single Cell ATAC v2", + "10x Chromium Single Cell Human BCR", + "10x Chromium Single Cell Human TCR", + "10x GEM-X Flex Gene Expression Human", + "10x GEM-X Universal 5' Gene Expression v3", + "CEL-Seq2", + "Chromium Next GEM Single Cell ATAC v1.1", + "Fluidigm C1 HT", + "NEBNext Human Immune Sequencing Kit", + "NEBNext Ultra II Directional RNA Library", + "Nextera XT", + "Nextera XT DNA", + "QIAseq miRNA Library", + "SMART-Seq Human BCR with UMI", + "SMART-Seq Human TCR with UMI", + "SMART-Seq v4 Ultra Low Input RNA", + "SMARTer Stranded Total RNA v2", + "Takara Human BCR profiling for Illumina", + "Takara Human TCR profiling for Illumina", + "Takara Human TCRv2 profiling for Illumina", + "Takara Human scTCR profiling for Illumina", + "TruSeq Stranded mRNA", + "custom DASH-treatment", + "in-house library prep" + ], + "title": "libraryPrepMethod" + }, + "NucleicAcidSource": { + "description": "The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.", + "enum": [ + "BCR mRNA", + "CRISPR protospacer feature barcode", + "TCR mRNA", + "Tn5-accessible gDNA", + "antigen capture barcode", + "gDNA", + "globin-depleted RNA", + "intracellular protein feature barcode", + "multiplexing oligo", + "poly(A) RNA", + "rRNA-depleted RNA", + "surface protein feature barcode" + ], + "title": "nucleicAcidSource" + }, + "PercentCellViability": { + "description": "A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.", + "maximum": 100.0, + "minimum": 50.0, + "title": "percentCellViability", + "type": "integer" + }, + "Platform": { + "description": "The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.", + "enum": [ + "BD FACSAria Fusion cell sorter", + "BD FACSAria III", + "BD FACSCanto", + "BD FACSCanto II", + "BD FACSDiscover A8", + "BD FACSDiscover S8", + "BD FACSLyric Clinical", + "BD FACSMelody", + "BD FACSymphony S6", + "BD LSRFortessa", + "Chromium Controller", + "Chromium GEM-X Single Cell 3' Chip v4", + "Chromium Next GEM Chip G", + "Chromium Next GEM Chip H", + "Chromium Next GEM Chip K", + "Chromium Next GEM Chip M", + "Chromium Next GEM Chip Q", + "Chromium X", + "Chromium Xo", + "Chromium iX", + "CyTOF XT", + "Cytek Aurora", + "Cytek Aurora Evo", + "Fluidigm BioMark", + "GEM-X Flex Gene Expression Chip", + "GEM-X OCM 5' Chip", + "Helios Mass Cytometer", + "Hyperion", + "Illumina HiSeq 2500", + "Illumina HiSeq X Ten", + "Illumina NextSeq 500", + "Illumina NovaSeq 6000", + "Illumina NovaSeq X", + "Not Applicable", + "Olink Signature Q100", + "Sony MA900", + "Thermo Fisher Attune CytPix", + "Thermo Fisher Attune NxT", + "Thermo Fisher Attune Xenith", + "Visium CytAssist", + "Xenium", + "none", + "unknown" + ], + "title": "platform" + }, + "SampleProcessingBatch": { + "description": "A label indicating batching of sample processing or preparation that occurs prior to data collection.", + "title": "sampleProcessingBatch", + "type": "string" + }, + "SequencingSaturation": { + "description": "A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.", + "maximum": 1.0, + "minimum": 0.0, + "title": "sequencingSaturation", + "type": "number" + }, + "SoftwareAndVersion": { + "description": "Relevant software and version used to generate the data file.", + "enum": [ + "BD FACSDiva 8.0.1", + "Cell Ranger 9.0.1", + "Cell Ranger ATAC v1.1.0", + "Cell Ranger v3.0.0", + "Cell Ranger v3.0.1", + "Cell Ranger v3.0.2", + "Cell Ranger v3.1.0", + "Cell Ranger v4.0.0", + "Cell Ranger v5.0.0", + "Cell Ranger v5.0.1", + "Cell Ranger v6.0.0", + "Cell Ranger v6.0.1", + "Cell Ranger v6.0.2", + "Cell Ranger v6.1.0", + "Cell Ranger v6.1.1", + "Cell Ranger v6.1.2", + "Cell Ranger v7.0.0", + "Cell Ranger v7.0.1", + "Cell Ranger v7.1.0", + "Cell Ranger v7.2.0", + "Cell Ranger v8.0.0", + "Cell Ranger v8.0.1", + "Cell Ranger v9.0.0", + "Space Ranger 3.0.0", + "Space Ranger 3.0.1", + "Space Ranger 3.1.0", + "Space Ranger 3.1.1", + "Space Ranger 3.1.2", + "Space Ranger 3.1.3", + "demuxlet" + ], + "title": "softwareAndVersion" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + }, + "TotalReads": { + "description": "Total number of reads sequenced from the library.", + "title": "totalReads", + "type": "integer" + } + }, + "required": [ + "AlignmentReference", + "Assay", + "Component", + "InputCellCount", + "LibraryPrepMethod", + "NucleicAcidSource", + "PercentCellViability", + "Platform", + "SpecimenModality", + "TotalReads" + ], + "title": "SnATAC-seqAssayMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.SnATACSeqProcessedDataAnnotationTemplate.schema.json b/model_json_schema/ark.SnATACSeqProcessedDataAnnotationTemplate.schema.json index c956df17..928da445 100644 --- a/model_json_schema/ark.SnATACSeqProcessedDataAnnotationTemplate.schema.json +++ b/model_json_schema/ark.SnATACSeqProcessedDataAnnotationTemplate.schema.json @@ -1,387 +1,370 @@ { + "$id": "http://example.com/SnATACSeqProcessedDataAnnotationTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "assay": { - "type": "array", - "items": { - "enum": [ - "CosMX", - "Olink Flex", - "CyTOF", - "snRNASeq", - "kiloplex", - "Olink Target 48", - "serial IHC", - "Xenium", - "Olink Focus", - "CITESeq", - "flow cytometry", - "NULISA", - "scVDJSeq", - "Visium", - "Olink Target 96", - "ASAPSeq", - "scRNASeq", - "RNASeq", - "snATACSeq", - "feature barcode sequencing", - "CE-MS", - "WES", - "SNP array", - "multiplexed ELISA", - "VDJSeq", - "imaging mass cytometry", - "H&E", - "WGS", - "LC-MS/MS", - "imaging mass spectrometry", - "Olink Explore HT", - "Olink Reveal", - "SomaScan", - "GenePS SeqFISH" - ] - }, - "maxItems": 34 - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "resourceType": { - "type": "array", - "items": { - "enum": [ - "metadata", - "experimental data" - ] - }, - "maxItems": 2 - }, - "specimenModality": { - "enum": [ - "unknown", - "multispecimen", - "single specimen" - ] - }, - "fileFormat": { - "enum": [ - "xls", - "Rds", - "bim", - "h5", - "h5ad", - "bai", - "zip", - "fam", - "tsv", - "csv", - "xlsx", - "bam", - "tgz", - "bed", - "mtx", - "txt" - ] - }, - "dataLevel": { - "enum": [ - "1", - "3", - "4", - "2", - "5" - ] - }, - "cellRangerOutput": { - "enum": [ - "filtered MEX", - "filtered_feature_bc_matrix", - "raw_feature_bc_matrix", - "Not Applicable", - "filtered_peak_bc_matrix", - "raw MEX", - "raw_peak_bc_matrix" - ] - }, - "targetPanelSize": {}, - "targetPanelSynID": {}, - "targetPanel": {}, - "metadataType": { - "enum": [ - "single-cell metadata", - "" - ] - }, - "processedDataType": { - "type": "array", - "items": { - "enum": [ - "gene counts", - "epigenomic peaks", - "differential expression results", - "barcode counts", - "" - ] - }, - "maxItems": 4 - }, - "individualID": {}, - "biospecimenID": {}, - "RObjectClass": { - "enum": [ - "vector", - "Symphony reference", - "Seurat object", - "list", - "sparse matrix", - "SummarizedExperiment", - "data.frame", - "matrix", - "ROCR prediction.object", - "" - ] - } - }, - "required": [ - "assay", - "Component", - "resourceType", - "specimenModality", - "fileFormat", - "dataLevel", - "cellRangerOutput" - ], "allOf": [ { "if": { "properties": { - "assay": { + "Assay": { "enum": [ - "feature barcode sequencing" + "Featurebarcodesequencing" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanelSize": {} + "TargetPanel": { + "not": { + "type": "null" + } + } }, "required": [ - "targetPanelSize" + "TargetPanel" ] } }, { "if": { "properties": { - "assay": { + "Assay": { "enum": [ - "feature barcode sequencing" + "Featurebarcodesequencing" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanelSynID": { + "TargetPanelSize": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "targetPanelSynID" + "TargetPanelSize" ] } }, { "if": { "properties": { - "assay": { + "Assay": { "enum": [ - "feature barcode sequencing" + "Featurebarcodesequencing" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanel": { + "TargetPanelSynID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "targetPanel" + "TargetPanelSynID" ] } }, { "if": { "properties": { - "resourceType": { + "FileFormat": { "enum": [ - "metadata" + "Rds" ] } - }, - "required": [ - "resourceType" - ] + } }, "then": { "properties": { - "metadataType": { - "enum": [ - "single-cell metadata" - ] + "RObjectClass": { + "not": { + "type": "null" + } } }, "required": [ - "metadataType" + "RObjectClass" ] } }, { "if": { "properties": { - "resourceType": { + "ResourceType": { "enum": [ - "experimental data" + "Experimentaldata" ] } - }, - "required": [ - "resourceType" - ] + } }, "then": { "properties": { - "processedDataType": { - "type": "array", - "items": { - "enum": [ - "gene counts", - "epigenomic peaks", - "differential expression results", - "barcode counts", - "" - ] - }, - "maxItems": 4 + "ProcessedDataType": { + "not": { + "type": "null" + } } }, "required": [ - "processedDataType" + "ProcessedDataType" ] } }, { "if": { "properties": { - "specimenModality": { + "ResourceType": { "enum": [ - "single specimen" + "Metadata" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "individualID": { + "MetadataType": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "individualID" + "MetadataType" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "single specimen" + "Singlespecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "BiospecimenID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "BiospecimenID" ] } }, { "if": { "properties": { - "fileFormat": { + "SpecimenModality": { "enum": [ - "Rds" + "Singlespecimen" ] } - }, - "required": [ - "fileFormat" - ] + } }, "then": { "properties": { - "RObjectClass": { - "enum": [ - "vector", - "Symphony reference", - "Seurat object", - "list", - "sparse matrix", - "SummarizedExperiment", - "data.frame", - "matrix", - "ROCR prediction.object", - "" - ] + "IndividualID": { + "not": { + "type": "null" + } } }, "required": [ - "RObjectClass" + "IndividualID" ] } } - ] + ], + "description": "A data contributor template outlining metadata to be collected as file annotations for snATAC-seq processed data files (i.e., anything not a fastq file).", + "properties": { + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "enum": [ + "ASAPSeq", + "CE-MS", + "CITESeq", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", + "LC-MS/MS", + "NULISA", + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", + "WES", + "WGS", + "Xenium", + "feature barcode sequencing", + "flow cytometry", + "imaging mass cytometry", + "imaging mass spectrometry", + "kiloplex", + "multiplexed ELISA", + "scRNASeq", + "scVDJSeq", + "serial IHC", + "snATACSeq", + "snRNASeq" + ], + "title": "assay" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "biospecimenID", + "type": "string" + }, + "CellRangerOutput": { + "description": "10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz", + "enum": [ + "Not Applicable", + "filtered MEX", + "filtered_feature_bc_matrix", + "filtered_peak_bc_matrix", + "raw MEX", + "raw_feature_bc_matrix", + "raw_peak_bc_matrix" + ], + "title": "cellRangerOutput" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataLevel": { + "description": "Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.", + "enum": [ + "1", + "2", + "3", + "4", + "5" + ], + "title": "dataLevel" + }, + "FileFormat": { + "description": "Standard file format name or file extension", + "enum": [ + "Rds", + "bai", + "bam", + "bed", + "bim", + "csv", + "fam", + "h5", + "h5ad", + "mtx", + "tgz", + "tsv", + "txt", + "xls", + "xlsx", + "zip" + ], + "title": "fileFormat" + }, + "IndividualID": { + "description": "Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "individualID", + "type": "string" + }, + "MetadataType": { + "description": "A label further classifying the content of metadata resource.", + "enum": [ + "single-cell metadata" + ], + "title": "metadataType" + }, + "ProcessedDataType": { + "description": "A label used for file annotations to provide a brief description of the processed data file.", + "enum": [ + "barcode counts", + "differential expression results", + "epigenomic peaks", + "gene counts" + ], + "title": "processedDataType" + }, + "RObjectClass": { + "description": "Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.", + "enum": [ + "ROCR prediction.object", + "Seurat object", + "SummarizedExperiment", + "Symphony reference", + "data.frame", + "list", + "matrix", + "sparse matrix", + "vector" + ], + "title": "RObjectClass" + }, + "ResourceType": { + "description": "High-level classification of the file content", + "enum": [ + "experimental data", + "metadata" + ], + "title": "resourceType" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + }, + "TargetPanel": { + "description": "A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.", + "title": "targetPanel", + "type": "string" + }, + "TargetPanelSize": { + "description": "The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).", + "title": "targetPanelSize", + "type": "integer" + }, + "TargetPanelSynID": { + "description": "In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.", + "pattern": "^syn[0-9]{8}", + "title": "targetPanelSynID", + "type": "string" + } + }, + "required": [ + "Assay", + "CellRangerOutput", + "Component", + "DataLevel", + "FileFormat", + "ResourceType", + "SpecimenModality" + ], + "title": "SnATACSeqProcessedDataAnnotationTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.SnRNASeqAssayMetadataTemplate.schema.json b/model_json_schema/ark.SnRNASeqAssayMetadataTemplate.schema.json index 95dfac5a..c4254dfd 100644 --- a/model_json_schema/ark.SnRNASeqAssayMetadataTemplate.schema.json +++ b/model_json_schema/ark.SnRNASeqAssayMetadataTemplate.schema.json @@ -1,285 +1,23 @@ { + "$id": "http://example.com/SnRNASeqAssayMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "dataCollectionBatch": {}, - "percentCellViability": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "softwareAndVersion": { - "enum": [ - "Cell Ranger v9.0.0", - "Cell Ranger v6.0.1", - "Cell Ranger v7.1.0", - "Cell Ranger ATAC v1.1.0", - "Cell Ranger v6.0.2", - "Space Ranger 3.1.1", - "Cell Ranger v6.1.1", - "Cell Ranger v7.0.1", - "Cell Ranger v4.0.0", - "Cell Ranger v3.0.1", - "Space Ranger 3.1.3", - "Cell Ranger v7.0.0", - "Cell Ranger v5.0.0", - "Cell Ranger v5.0.1", - "Cell Ranger v3.0.0", - "Cell Ranger v8.0.1", - "Cell Ranger v7.2.0", - "Cell Ranger v3.0.2", - "Cell Ranger 9.0.1", - "Cell Ranger v8.0.0", - "Cell Ranger v6.1.0", - "Cell Ranger v3.1.0", - "Cell Ranger v6.1.2", - "Cell Ranger v6.0.0", - "Space Ranger 3.0.0", - "Space Ranger 3.1.0", - "Space Ranger 3.1.2", - "Space Ranger 3.0.1", - "demuxlet", - "BD FACSDiva 8.0.1", - "" - ] - }, - "alignmentReference": { - "enum": [ - "10x Cell Ranger Human GRCh38 2020-A", - "GRCh38", - "modified GRCh38", - "10x Cell Ranger Human GRCh38 2024-A", - "vdj_GRCh38_alts_ensembl-4.0.0", - "unknown" - ] - }, - "nucleicAcidSource": { - "type": "array", - "items": { - "enum": [ - "globin-depleted RNA", - "CRISPR protospacer feature barcode", - "poly(A) RNA", - "gDNA", - "antigen capture barcode", - "multiplexing oligo", - "TCR mRNA", - "BCR mRNA", - "surface protein feature barcode", - "rRNA-depleted RNA", - "Tn5-accessible gDNA", - "intracellular protein feature barcode" - ] - }, - "maxItems": 12 - }, - "sampleProcessingBatch": {}, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "platform": { - "type": "array", - "items": { - "enum": [ - "BD FACSAria III", - "Illumina NovaSeq X", - "BD FACSDiscover A8", - "Thermo Fisher Attune NxT", - "Chromium GEM-X Single Cell 3' Chip v4", - "GEM-X Flex Gene Expression Chip", - "BD FACSDiscover S8", - "GEM-X OCM 5' Chip", - "BD FACSAria Fusion cell sorter", - "Thermo Fisher Attune Xenith", - "Not Applicable", - "Olink Signature Q100", - "Cytek Aurora Evo", - "Chromium Next GEM Chip Q", - "Chromium X", - "Chromium Xo", - "BD LSRFortessa", - "CyTOF XT", - "Chromium iX", - "BD FACSCanto", - "Visium CytAssist", - "Chromium Next GEM Chip M", - "BD FACSLyric Clinical", - "Chromium Next GEM Chip H", - "Chromium Controller", - "Helios Mass Cytometer", - "BD FACSMelody", - "Fluidigm BioMark", - "Xenium", - "none", - "Illumina NovaSeq 6000", - "Illumina NextSeq 500", - "Sony MA900", - "Hyperion", - "Chromium Next GEM Chip G", - "Thermo Fisher Attune CytPix", - "BD FACSCanto II", - "Illumina HiSeq 2500", - "Illumina HiSeq X Ten", - "BD FACSymphony S6", - "unknown", - "Cytek Aurora", - "Chromium Next GEM Chip K" - ] - }, - "maxItems": 43 - }, - "assay": { - "type": "array", - "items": { - "enum": [ - "imaging mass spectrometry", - "imaging mass cytometry", - "snRNASeq", - "Olink Target 96", - "Olink Explore HT", - "RNASeq", - "Visium", - "scVDJSeq", - "multiplexed ELISA", - "GenePS SeqFISH", - "Olink Target 48", - "SomaScan", - "CITESeq", - "ASAPSeq", - "VDJSeq", - "flow cytometry", - "Olink Flex", - "serial IHC", - "WES", - "WGS", - "LC-MS/MS", - "Olink Reveal", - "Olink Focus", - "H&E", - "Xenium", - "kiloplex", - "SNP array", - "CE-MS", - "feature barcode sequencing", - "NULISA", - "CyTOF", - "CosMX", - "scRNASeq", - "snATACSeq" - ] - }, - "maxItems": 34 - }, - "inputCellCount": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "sequencingSaturation": {}, - "totalReads": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "libraryPrepMethod": { - "enum": [ - "Nextera XT DNA", - "Takara Human scTCR profiling for Illumina", - "10x GEM-X Flex Gene Expression Human", - "SMARTer Stranded Total RNA v2", - "Takara Human TCR profiling for Illumina", - "SMART-Seq Human TCR with UMI", - "Chromium Next GEM Single Cell ATAC v1.1", - "10x Chromium Fixed RNA Human Transcriptome", - "10x GEM-X Universal 5' Gene Expression v3", - "10x Chromium GEM-X Single Cell 5' v3", - "10x Chromium Next GEM Single Cell 3' 3.1", - "NEBNext Ultra II Directional RNA Library", - "SMART-Seq v4 Ultra Low Input RNA", - "SMART-Seq Human BCR with UMI", - "Takara Human BCR profiling for Illumina", - "QIAseq miRNA Library", - "10x Chromium Single Cell Human TCR", - "CEL-Seq2", - "NEBNext Human Immune Sequencing Kit", - "10x Chromium Next GEM Single Cell 5' v1.1", - "10x Chromium Next GEM Single Cell 5' v2", - "Fluidigm C1 HT", - "Takara Human TCRv2 profiling for Illumina", - "Nextera XT", - "10x Chromium Next GEM Single Cell ATAC v2", - "10x Chromium GEM-X Single Cell 3' v4", - "TruSeq Stranded mRNA", - "10x Chromium Single Cell Human BCR", - "10x Chromium Next GEM Single Cell 3'", - "custom DASH-treatment", - "in-house library prep" - ] - }, - "specimenModality": { - "enum": [ - "multispecimen", - "unknown", - "single specimen" - ] - }, - "10xProbeSetReference": { - "enum": [ - "custom probe set", - "Flex Human Transcriptome Probe Set v1.0.1", - "Flex Human Transcriptome Probe Set v1.1.0", - "Visium Human Transcriptome v1", - "Visium Human Transcriptome v2", - "" - ] - }, - "libraryID": {}, - "biospecimenID": {} - }, - "required": [ - "percentCellViability", - "alignmentReference", - "nucleicAcidSource", - "Component", - "platform", - "assay", - "inputCellCount", - "totalReads", - "libraryPrepMethod", - "specimenModality" - ], "allOf": [ { "if": { "properties": { - "libraryPrepMethod": { + "LibraryPrepMethod": { "enum": [ - "10x GEM-X Flex Gene Expression Human" + "10xGEM-XFlexGeneExpressionHuman" ] } - }, - "required": [ - "libraryPrepMethod" - ] + } }, "then": { "properties": { "10xProbeSetReference": { - "enum": [ - "custom probe set", - "Flex Human Transcriptome Probe Set v1.0.1", - "Flex Human Transcriptome Probe Set v1.1.0", - "Visium Human Transcriptome v1", - "Visium Human Transcriptome v2" - ] + "not": { + "type": "null" + } } }, "required": [ @@ -290,56 +28,326 @@ { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "multispecimen" + "Multispecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "libraryID": { + "LibraryID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "libraryID" + "LibraryID" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "single specimen" + "Singlespecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "BiospecimenID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "BiospecimenID" ] } } - ] + ], + "description": "A template outlining metadata to be collected for each library in a snRNA-seq dataset.", + "properties": { + "10xProbeSetReference": { + "description": "Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.", + "enum": [ + "Flex Human Transcriptome Probe Set v1.0.1", + "Flex Human Transcriptome Probe Set v1.1.0", + "Visium Human Transcriptome v1", + "Visium Human Transcriptome v2", + "custom probe set" + ], + "title": "10xProbeSetReference" + }, + "AlignmentReference": { + "description": "The genomic/transcriptomic reference used for performing read alignment against.", + "enum": [ + "10x Cell Ranger Human GRCh38 2020-A", + "10x Cell Ranger Human GRCh38 2024-A", + "GRCh38", + "modified GRCh38", + "unknown", + "vdj_GRCh38_alts_ensembl-4.0.0" + ], + "title": "alignmentReference" + }, + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "enum": [ + "ASAPSeq", + "CE-MS", + "CITESeq", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", + "LC-MS/MS", + "NULISA", + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", + "WES", + "WGS", + "Xenium", + "feature barcode sequencing", + "flow cytometry", + "imaging mass cytometry", + "imaging mass spectrometry", + "kiloplex", + "multiplexed ELISA", + "scRNASeq", + "scVDJSeq", + "serial IHC", + "snATACSeq", + "snRNASeq" + ], + "title": "assay" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "biospecimenID", + "type": "string" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataCollectionBatch": { + "description": "A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.", + "title": "dataCollectionBatch", + "type": "string" + }, + "InputCellCount": { + "description": "An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.", + "title": "inputCellCount", + "type": "integer" + }, + "LibraryID": { + "description": "A library label or name, unique within an experiment, used to distinguish sequencing libraries.", + "title": "libraryID", + "type": "string" + }, + "LibraryPrepMethod": { + "description": "Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.", + "enum": [ + "10x Chromium Fixed RNA Human Transcriptome", + "10x Chromium GEM-X Single Cell 3' v4", + "10x Chromium GEM-X Single Cell 5' v3", + "10x Chromium Next GEM Single Cell 3'", + "10x Chromium Next GEM Single Cell 3' 3.1", + "10x Chromium Next GEM Single Cell 5' v1.1", + "10x Chromium Next GEM Single Cell 5' v2", + "10x Chromium Next GEM Single Cell ATAC v2", + "10x Chromium Single Cell Human BCR", + "10x Chromium Single Cell Human TCR", + "10x GEM-X Flex Gene Expression Human", + "10x GEM-X Universal 5' Gene Expression v3", + "CEL-Seq2", + "Chromium Next GEM Single Cell ATAC v1.1", + "Fluidigm C1 HT", + "NEBNext Human Immune Sequencing Kit", + "NEBNext Ultra II Directional RNA Library", + "Nextera XT", + "Nextera XT DNA", + "QIAseq miRNA Library", + "SMART-Seq Human BCR with UMI", + "SMART-Seq Human TCR with UMI", + "SMART-Seq v4 Ultra Low Input RNA", + "SMARTer Stranded Total RNA v2", + "Takara Human BCR profiling for Illumina", + "Takara Human TCR profiling for Illumina", + "Takara Human TCRv2 profiling for Illumina", + "Takara Human scTCR profiling for Illumina", + "TruSeq Stranded mRNA", + "custom DASH-treatment", + "in-house library prep" + ], + "title": "libraryPrepMethod" + }, + "NucleicAcidSource": { + "description": "The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.", + "enum": [ + "BCR mRNA", + "CRISPR protospacer feature barcode", + "TCR mRNA", + "Tn5-accessible gDNA", + "antigen capture barcode", + "gDNA", + "globin-depleted RNA", + "intracellular protein feature barcode", + "multiplexing oligo", + "poly(A) RNA", + "rRNA-depleted RNA", + "surface protein feature barcode" + ], + "title": "nucleicAcidSource" + }, + "PercentCellViability": { + "description": "A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.", + "maximum": 100.0, + "minimum": 50.0, + "title": "percentCellViability", + "type": "integer" + }, + "Platform": { + "description": "The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.", + "enum": [ + "BD FACSAria Fusion cell sorter", + "BD FACSAria III", + "BD FACSCanto", + "BD FACSCanto II", + "BD FACSDiscover A8", + "BD FACSDiscover S8", + "BD FACSLyric Clinical", + "BD FACSMelody", + "BD FACSymphony S6", + "BD LSRFortessa", + "Chromium Controller", + "Chromium GEM-X Single Cell 3' Chip v4", + "Chromium Next GEM Chip G", + "Chromium Next GEM Chip H", + "Chromium Next GEM Chip K", + "Chromium Next GEM Chip M", + "Chromium Next GEM Chip Q", + "Chromium X", + "Chromium Xo", + "Chromium iX", + "CyTOF XT", + "Cytek Aurora", + "Cytek Aurora Evo", + "Fluidigm BioMark", + "GEM-X Flex Gene Expression Chip", + "GEM-X OCM 5' Chip", + "Helios Mass Cytometer", + "Hyperion", + "Illumina HiSeq 2500", + "Illumina HiSeq X Ten", + "Illumina NextSeq 500", + "Illumina NovaSeq 6000", + "Illumina NovaSeq X", + "Not Applicable", + "Olink Signature Q100", + "Sony MA900", + "Thermo Fisher Attune CytPix", + "Thermo Fisher Attune NxT", + "Thermo Fisher Attune Xenith", + "Visium CytAssist", + "Xenium", + "none", + "unknown" + ], + "title": "platform" + }, + "SampleProcessingBatch": { + "description": "A label indicating batching of sample processing or preparation that occurs prior to data collection.", + "title": "sampleProcessingBatch", + "type": "string" + }, + "SequencingSaturation": { + "description": "A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.", + "maximum": 1.0, + "minimum": 0.0, + "title": "sequencingSaturation", + "type": "number" + }, + "SoftwareAndVersion": { + "description": "Relevant software and version used to generate the data file.", + "enum": [ + "BD FACSDiva 8.0.1", + "Cell Ranger 9.0.1", + "Cell Ranger ATAC v1.1.0", + "Cell Ranger v3.0.0", + "Cell Ranger v3.0.1", + "Cell Ranger v3.0.2", + "Cell Ranger v3.1.0", + "Cell Ranger v4.0.0", + "Cell Ranger v5.0.0", + "Cell Ranger v5.0.1", + "Cell Ranger v6.0.0", + "Cell Ranger v6.0.1", + "Cell Ranger v6.0.2", + "Cell Ranger v6.1.0", + "Cell Ranger v6.1.1", + "Cell Ranger v6.1.2", + "Cell Ranger v7.0.0", + "Cell Ranger v7.0.1", + "Cell Ranger v7.1.0", + "Cell Ranger v7.2.0", + "Cell Ranger v8.0.0", + "Cell Ranger v8.0.1", + "Cell Ranger v9.0.0", + "Space Ranger 3.0.0", + "Space Ranger 3.0.1", + "Space Ranger 3.1.0", + "Space Ranger 3.1.1", + "Space Ranger 3.1.2", + "Space Ranger 3.1.3", + "demuxlet" + ], + "title": "softwareAndVersion" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + }, + "TotalReads": { + "description": "Total number of reads sequenced from the library.", + "title": "totalReads", + "type": "integer" + } + }, + "required": [ + "AlignmentReference", + "Assay", + "Component", + "InputCellCount", + "LibraryPrepMethod", + "NucleicAcidSource", + "PercentCellViability", + "Platform", + "SpecimenModality", + "TotalReads" + ], + "title": "SnRNASeqAssayMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.SnRNASeqProcessedDataAnnotationTemplate.schema.json b/model_json_schema/ark.SnRNASeqProcessedDataAnnotationTemplate.schema.json index 4a4e518e..26ddce53 100644 --- a/model_json_schema/ark.SnRNASeqProcessedDataAnnotationTemplate.schema.json +++ b/model_json_schema/ark.SnRNASeqProcessedDataAnnotationTemplate.schema.json @@ -1,387 +1,370 @@ { + "$id": "http://example.com/SnRNASeqProcessedDataAnnotationTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "resourceType": { - "type": "array", - "items": { - "enum": [ - "metadata", - "experimental data" - ] - }, - "maxItems": 2 - }, - "fileFormat": { - "enum": [ - "xls", - "fam", - "bed", - "bai", - "Rds", - "bim", - "tsv", - "h5", - "tgz", - "zip", - "xlsx", - "txt", - "mtx", - "bam", - "csv", - "h5ad" - ] - }, - "specimenModality": { - "enum": [ - "multispecimen", - "unknown", - "single specimen" - ] - }, - "assay": { - "type": "array", - "items": { - "enum": [ - "H&E", - "Olink Focus", - "NULISA", - "CosMX", - "Olink Target 48", - "Olink Target 96", - "CITESeq", - "snATACSeq", - "scRNASeq", - "SNP array", - "snRNASeq", - "RNASeq", - "GenePS SeqFISH", - "SomaScan", - "Xenium", - "Visium", - "Olink Explore HT", - "CyTOF", - "Olink Reveal", - "imaging mass spectrometry", - "serial IHC", - "flow cytometry", - "LC-MS/MS", - "feature barcode sequencing", - "imaging mass cytometry", - "kiloplex", - "scVDJSeq", - "multiplexed ELISA", - "CE-MS", - "WES", - "VDJSeq", - "WGS", - "Olink Flex", - "ASAPSeq" - ] - }, - "maxItems": 34 - }, - "cellRangerOutput": { - "enum": [ - "filtered_peak_bc_matrix", - "raw MEX", - "raw_peak_bc_matrix", - "filtered MEX", - "raw_feature_bc_matrix", - "Not Applicable", - "filtered_feature_bc_matrix" - ] - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "dataLevel": { - "enum": [ - "1", - "3", - "2", - "4", - "5" - ] - }, - "metadataType": { - "enum": [ - "single-cell metadata", - "" - ] - }, - "processedDataType": { - "type": "array", - "items": { - "enum": [ - "epigenomic peaks", - "barcode counts", - "gene counts", - "differential expression results", - "" - ] - }, - "maxItems": 4 - }, - "RObjectClass": { - "enum": [ - "ROCR prediction.object", - "sparse matrix", - "SummarizedExperiment", - "list", - "matrix", - "Seurat object", - "vector", - "data.frame", - "Symphony reference", - "" - ] - }, - "individualID": {}, - "biospecimenID": {}, - "targetPanelSize": {}, - "targetPanel": {}, - "targetPanelSynID": {} - }, - "required": [ - "resourceType", - "fileFormat", - "specimenModality", - "assay", - "cellRangerOutput", - "Component", - "dataLevel" - ], "allOf": [ { "if": { "properties": { - "resourceType": { + "Assay": { "enum": [ - "metadata" + "Featurebarcodesequencing" ] } - }, - "required": [ - "resourceType" - ] + } }, "then": { "properties": { - "metadataType": { - "enum": [ - "single-cell metadata" - ] + "TargetPanel": { + "not": { + "type": "null" + } } }, "required": [ - "metadataType" + "TargetPanel" ] } }, { "if": { "properties": { - "resourceType": { + "Assay": { "enum": [ - "experimental data" + "Featurebarcodesequencing" ] } - }, - "required": [ - "resourceType" - ] + } }, "then": { "properties": { - "processedDataType": { - "type": "array", - "items": { - "enum": [ - "epigenomic peaks", - "barcode counts", - "gene counts", - "differential expression results", - "" - ] - }, - "maxItems": 4 + "TargetPanelSize": { + "not": { + "type": "null" + } } }, "required": [ - "processedDataType" + "TargetPanelSize" ] } }, { "if": { "properties": { - "fileFormat": { + "Assay": { "enum": [ - "Rds" + "Featurebarcodesequencing" ] } - }, - "required": [ - "fileFormat" - ] + } }, "then": { "properties": { - "RObjectClass": { - "enum": [ - "ROCR prediction.object", - "sparse matrix", - "SummarizedExperiment", - "list", - "matrix", - "Seurat object", - "vector", - "data.frame", - "Symphony reference", - "" - ] + "TargetPanelSynID": { + "not": { + "type": "null" + } } }, "required": [ - "RObjectClass" + "TargetPanelSynID" ] } }, { "if": { "properties": { - "specimenModality": { + "FileFormat": { "enum": [ - "single specimen" + "Rds" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "individualID": { + "RObjectClass": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "individualID" + "RObjectClass" ] } }, { "if": { "properties": { - "specimenModality": { + "ResourceType": { "enum": [ - "single specimen" + "Experimentaldata" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "ProcessedDataType": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "ProcessedDataType" ] } }, { "if": { "properties": { - "assay": { + "ResourceType": { "enum": [ - "feature barcode sequencing" + "Metadata" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanelSize": {} + "MetadataType": { + "not": { + "type": "null" + } + } }, "required": [ - "targetPanelSize" + "MetadataType" ] } }, { "if": { "properties": { - "assay": { + "SpecimenModality": { "enum": [ - "feature barcode sequencing" + "Singlespecimen" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanel": { + "BiospecimenID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "targetPanel" + "BiospecimenID" ] } }, { "if": { "properties": { - "assay": { + "SpecimenModality": { "enum": [ - "feature barcode sequencing" + "Singlespecimen" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanelSynID": { + "IndividualID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "targetPanelSynID" + "IndividualID" ] } } - ] + ], + "description": "A data contributor template outlining metadata to be collected as file annotations for snRNA-seq processed data files (i.e., anything not a fastq file).", + "properties": { + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "enum": [ + "ASAPSeq", + "CE-MS", + "CITESeq", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", + "LC-MS/MS", + "NULISA", + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", + "WES", + "WGS", + "Xenium", + "feature barcode sequencing", + "flow cytometry", + "imaging mass cytometry", + "imaging mass spectrometry", + "kiloplex", + "multiplexed ELISA", + "scRNASeq", + "scVDJSeq", + "serial IHC", + "snATACSeq", + "snRNASeq" + ], + "title": "assay" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "biospecimenID", + "type": "string" + }, + "CellRangerOutput": { + "description": "10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz", + "enum": [ + "Not Applicable", + "filtered MEX", + "filtered_feature_bc_matrix", + "filtered_peak_bc_matrix", + "raw MEX", + "raw_feature_bc_matrix", + "raw_peak_bc_matrix" + ], + "title": "cellRangerOutput" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataLevel": { + "description": "Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.", + "enum": [ + "1", + "2", + "3", + "4", + "5" + ], + "title": "dataLevel" + }, + "FileFormat": { + "description": "Standard file format name or file extension", + "enum": [ + "Rds", + "bai", + "bam", + "bed", + "bim", + "csv", + "fam", + "h5", + "h5ad", + "mtx", + "tgz", + "tsv", + "txt", + "xls", + "xlsx", + "zip" + ], + "title": "fileFormat" + }, + "IndividualID": { + "description": "Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "individualID", + "type": "string" + }, + "MetadataType": { + "description": "A label further classifying the content of metadata resource.", + "enum": [ + "single-cell metadata" + ], + "title": "metadataType" + }, + "ProcessedDataType": { + "description": "A label used for file annotations to provide a brief description of the processed data file.", + "enum": [ + "barcode counts", + "differential expression results", + "epigenomic peaks", + "gene counts" + ], + "title": "processedDataType" + }, + "RObjectClass": { + "description": "Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.", + "enum": [ + "ROCR prediction.object", + "Seurat object", + "SummarizedExperiment", + "Symphony reference", + "data.frame", + "list", + "matrix", + "sparse matrix", + "vector" + ], + "title": "RObjectClass" + }, + "ResourceType": { + "description": "High-level classification of the file content", + "enum": [ + "experimental data", + "metadata" + ], + "title": "resourceType" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + }, + "TargetPanel": { + "description": "A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.", + "title": "targetPanel", + "type": "string" + }, + "TargetPanelSize": { + "description": "The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).", + "title": "targetPanelSize", + "type": "integer" + }, + "TargetPanelSynID": { + "description": "In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.", + "pattern": "^syn[0-9]{8}", + "title": "targetPanelSynID", + "type": "string" + } + }, + "required": [ + "Assay", + "CellRangerOutput", + "Component", + "DataLevel", + "FileFormat", + "ResourceType", + "SpecimenModality" + ], + "title": "SnRNASeqProcessedDataAnnotationTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.SpatialImagingAssayMetadataTemplate.schema.json b/model_json_schema/ark.SpatialImagingAssayMetadataTemplate.schema.json index ad314461..78664b9e 100644 --- a/model_json_schema/ark.SpatialImagingAssayMetadataTemplate.schema.json +++ b/model_json_schema/ark.SpatialImagingAssayMetadataTemplate.schema.json @@ -1,112 +1,127 @@ { + "$id": "http://example.com/SpatialImagingAssayMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "targetPanelSize": {}, - "assay": { - "enum": [ - "H&E", - "CosMX", - "serial IHC", - "Xenium", - "GenePS SeqFISH", - "imaging mass cytometry", - "Visium" - ] - }, - "specimenModality": { - "enum": [ - "multispecimen", - "unknown", - "single specimen" - ] - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "targetPanelSynID": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "sampleProcessingBatch": {}, - "targetPanel": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "dataCollectionBatch": {}, - "slideID": {}, - "biospecimenID": {} - }, - "required": [ - "assay", - "specimenModality", - "Component", - "targetPanelSynID", - "targetPanel" - ], "allOf": [ { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "multispecimen" + "Multispecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "slideID": { + "SlideID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "slideID" + "SlideID" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "single specimen" + "Singlespecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "BiospecimenID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "BiospecimenID" ] } } - ] + ], + "description": "A template outlining metadata to be collected for each slide in a spatial transcriptomic or imaging based dataset.", + "properties": { + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "enum": [ + "CosMX", + "GenePS SeqFISH", + "H&E", + "Visium", + "Xenium", + "imaging mass cytometry", + "serial IHC" + ], + "title": "assay" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "biospecimenID", + "type": "string" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataCollectionBatch": { + "description": "A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.", + "title": "dataCollectionBatch", + "type": "string" + }, + "SampleProcessingBatch": { + "description": "A label indicating batching of sample processing or preparation that occurs prior to data collection.", + "title": "sampleProcessingBatch", + "type": "string" + }, + "SlideID": { + "description": "A distinct label or name, unique within an experiment, assigned to an imaging slides.", + "title": "slideID", + "type": "string" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + }, + "TargetPanel": { + "description": "A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.", + "title": "targetPanel", + "type": "string" + }, + "TargetPanelSize": { + "description": "The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).", + "title": "targetPanelSize", + "type": "integer" + }, + "TargetPanelSynID": { + "description": "In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.", + "pattern": "^syn[0-9]{8}", + "title": "targetPanelSynID", + "type": "string" + } + }, + "required": [ + "Assay", + "Component", + "SpecimenModality", + "TargetPanel", + "TargetPanelSynID" + ], + "title": "SpatialImagingAssayMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.SpatialImagingFileAnnotationTemplate.schema.json b/model_json_schema/ark.SpatialImagingFileAnnotationTemplate.schema.json index 3046bbe4..efc2c02e 100644 --- a/model_json_schema/ark.SpatialImagingFileAnnotationTemplate.schema.json +++ b/model_json_schema/ark.SpatialImagingFileAnnotationTemplate.schema.json @@ -1,155 +1,23 @@ { + "$id": "http://example.com/SpatialImagingFileAnnotationTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "fileFormat": { - "enum": [ - "fcs", - "h5ad", - "h5", - "txt", - "bim", - "geojson", - "tbi", - "vcf", - "tsv", - "mcd", - "erate", - "bai", - "py", - "bed", - "dose", - "czi", - "fastq", - "rds", - "fam", - "xls", - "tgz", - "zip", - "svs", - "parquet", - "xlsx", - "bam", - "pdf", - "rec", - "csv", - "info", - "mtx", - "docx" - ] - }, - "assay": { - "enum": [ - "Visium", - "CosMX", - "GenePS SeqFISH", - "Xenium", - "imaging mass cytometry", - "H&E", - "serial IHC" - ] - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "resourceType": { - "type": "array", - "items": { - "enum": [ - "metadata", - "experimental data" - ] - }, - "maxItems": 2 - }, - "specimenModality": { - "enum": [ - "multispecimen", - "unknown", - "single specimen" - ] - }, - "RObjectClass": { - "enum": [ - "data.frame", - "sparse matrix", - "Seurat object", - "vector", - "Symphony reference", - "ROCR prediction.object", - "matrix", - "list", - "SummarizedExperiment", - "" - ] - }, - "metadataType": { - "enum": [ - "target panel", - "cell coordinates", - "tissue microarray map", - "single-cell metadata", - "other", - "" - ] - }, - "processedDataType": { - "type": "array", - "items": { - "enum": [ - "differential expression results", - "epigenomic peaks", - "gene counts", - "barcode counts", - "" - ] - }, - "maxItems": 4 - }, - "slideID": {}, - "biospecimenID": {} - }, - "required": [ - "fileFormat", - "assay", - "Component", - "resourceType", - "specimenModality" - ], "allOf": [ { "if": { "properties": { - "fileFormat": { + "FileFormat": { "enum": [ - "rds" + "Rds" ] } - }, - "required": [ - "fileFormat" - ] + } }, "then": { "properties": { "RObjectClass": { - "enum": [ - "data.frame", - "sparse matrix", - "Seurat object", - "vector", - "Symphony reference", - "ROCR prediction.object", - "matrix", - "list", - "SummarizedExperiment", - "" - ] + "not": { + "type": "null" + } } }, "required": [ @@ -160,120 +28,225 @@ { "if": { "properties": { - "resourceType": { + "ResourceType": { "enum": [ - "metadata" + "Experimentaldata" ] } - }, - "required": [ - "resourceType" - ] + } }, "then": { "properties": { - "metadataType": { - "enum": [ - "target panel", - "cell coordinates", - "tissue microarray map", - "single-cell metadata", - "other" - ] + "ProcessedDataType": { + "not": { + "type": "null" + } } }, "required": [ - "metadataType" + "ProcessedDataType" ] } }, { "if": { "properties": { - "resourceType": { + "ResourceType": { "enum": [ - "experimental data" + "Metadata" ] } - }, - "required": [ - "resourceType" - ] + } }, "then": { "properties": { - "processedDataType": { - "type": "array", - "items": { - "enum": [ - "differential expression results", - "epigenomic peaks", - "gene counts", - "barcode counts", - "" - ] - }, - "maxItems": 4 + "MetadataType": { + "not": { + "type": "null" + } } }, "required": [ - "processedDataType" + "MetadataType" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "multispecimen" + "Multispecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "slideID": { + "SlideID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "slideID" + "SlideID" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "single specimen" + "Singlespecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "BiospecimenID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "BiospecimenID" ] } } - ] + ], + "description": "A data contributor template outlining metadata to be collected as file annotations for imaging-based data files.", + "properties": { + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "enum": [ + "CosMX", + "GenePS SeqFISH", + "H&E", + "Visium", + "Xenium", + "imaging mass cytometry", + "serial IHC" + ], + "title": "assay" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "biospecimenID", + "type": "string" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "FileFormat": { + "description": "Standard file format name or file extension", + "enum": [ + "bai", + "bam", + "bed", + "bim", + "csv", + "czi", + "docx", + "dose", + "erate", + "fam", + "fastq", + "fcs", + "geojson", + "h5", + "h5ad", + "info", + "mcd", + "mtx", + "parquet", + "pdf", + "py", + "rds", + "rec", + "svs", + "tbi", + "tgz", + "tsv", + "txt", + "vcf", + "xls", + "xlsx", + "zip" + ], + "title": "fileFormat" + }, + "MetadataType": { + "description": "A label further classifying the content of metadata resource.", + "enum": [ + "cell coordinates", + "other", + "single-cell metadata", + "target panel", + "tissue microarray map" + ], + "title": "metadataType" + }, + "ProcessedDataType": { + "description": "A label used for file annotations to provide a brief description of the processed data file.", + "enum": [ + "barcode counts", + "differential expression results", + "epigenomic peaks", + "gene counts" + ], + "title": "processedDataType" + }, + "RObjectClass": { + "description": "Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.", + "enum": [ + "ROCR prediction.object", + "Seurat object", + "SummarizedExperiment", + "Symphony reference", + "data.frame", + "list", + "matrix", + "sparse matrix", + "vector" + ], + "title": "RObjectClass" + }, + "ResourceType": { + "description": "High-level classification of the file content", + "enum": [ + "experimental data", + "metadata" + ], + "title": "resourceType" + }, + "SlideID": { + "description": "A distinct label or name, unique within an experiment, assigned to an imaging slides.", + "title": "slideID", + "type": "string" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + } + }, + "required": [ + "Assay", + "Component", + "FileFormat", + "ResourceType", + "SpecimenModality" + ], + "title": "SpatialImagingFileAnnotationTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_templates/ark.BDMFCSFileAnnotations.csv b/model_templates/ark.BDMFCSFileAnnotations.csv index 6969d8f6..0b320324 100644 --- a/model_templates/ark.BDMFCSFileAnnotations.csv +++ b/model_templates/ark.BDMFCSFileAnnotations.csv @@ -1 +1 @@ -Component,primaryCellSource,parentBiospecimenID,eventCount,diagnosis,sampleProcessingBatch,biospecimenSubtype,cellType,project,assay,species,biospecimenType,platform,specimenModality,dataSubtype,dataCollectionBatch,resourceType,fileFormat,dataType,userDefinedCellType,program,individualID,biospecimenID,visitID +Assay,BiospecimenID,BiospecimenSubtype,BiospecimenType,CellType,Component,DataCollectionBatch,DataSubtype,DataType,Diagnosis,EventCount,FileFormat,IndividualID,ParentBiospecimenID,Platform,PrimaryCellSource,Program,Project,ResourceType,SampleProcessingBatch,Species,SpecimenModality,UserDefinedCellType,VisitID diff --git a/model_templates/ark.BDMFastqFileAnnotations.csv b/model_templates/ark.BDMFastqFileAnnotations.csv index 14c9c417..adc6f021 100644 --- a/model_templates/ark.BDMFastqFileAnnotations.csv +++ b/model_templates/ark.BDMFastqFileAnnotations.csv @@ -1 +1 @@ -dataType,dataSubtype,diagnosis,nucleicAcidSource,readLength,species,resourceType,parentBiospecimenID,cellType,primaryCellSource,libraryPrepMethod,userDefinedCellType,biospecimenType,program,fileFormat,specimenModality,Component,platform,assay,project,biospecimenSubtype,visitID,libraryID,individualID,biospecimenID,targetPanelSize,targetPanelSynID,targetPanel +Assay,BiospecimenID,BiospecimenSubtype,BiospecimenType,CellType,Component,DataSubtype,DataType,Diagnosis,FileFormat,IndividualID,LibraryID,LibraryPrepMethod,NucleicAcidSource,ParentBiospecimenID,Platform,PrimaryCellSource,Program,Project,ReadLength,ResourceType,Species,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID,UserDefinedCellType,VisitID diff --git a/model_templates/ark.BDMMetadataFileAnnotations.csv b/model_templates/ark.BDMMetadataFileAnnotations.csv index 579cc0e6..8b56bf86 100644 --- a/model_templates/ark.BDMMetadataFileAnnotations.csv +++ b/model_templates/ark.BDMMetadataFileAnnotations.csv @@ -1 +1 @@ -program,dataType,species,project,fileFormat,primaryCellSource,userDefinedCellType,resourceType,cellType,Component,programPhase,metadataType,metadataStandards,assay +Assay,CellType,Component,DataType,FileFormat,MetadataStandards,MetadataType,PrimaryCellSource,Program,ProgramPhase,Project,ResourceType,Species,UserDefinedCellType diff --git a/model_templates/ark.BDMOlinkFileAnnotations.csv b/model_templates/ark.BDMOlinkFileAnnotations.csv index 0f13dde5..8863aeda 100644 --- a/model_templates/ark.BDMOlinkFileAnnotations.csv +++ b/model_templates/ark.BDMOlinkFileAnnotations.csv @@ -1 +1 @@ -cellType,dataSubtype,dataType,plateID,visitID,platform,diagnosis,targetPanel,species,targetPanelSize,resourceType,targetPanelSynID,fileFormat,specimenModality,biospecimenType,program,project,userDefinedCellType,primaryCellSource,Component,programPhase +BiospecimenType,CellType,Component,DataSubtype,DataType,Diagnosis,FileFormat,PlateID,Platform,PrimaryCellSource,Program,ProgramPhase,Project,ResourceType,Species,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID,UserDefinedCellType,VisitID diff --git a/model_templates/ark.BDMSpatialImagingFileAnnotations.csv b/model_templates/ark.BDMSpatialImagingFileAnnotations.csv index 7d5b65f9..2e663a7e 100644 --- a/model_templates/ark.BDMSpatialImagingFileAnnotations.csv +++ b/model_templates/ark.BDMSpatialImagingFileAnnotations.csv @@ -1 +1 @@ -parentBiospecimenID,Component,dataType,biospecimenSubtype,resourceType,program,dataSubtype,specimenModality,targetPanel,project,userDefinedCellType,targetPanelSynID,biospecimenType,primaryCellSource,fileFormat,cellType,visitID,targetPanelSize,assay,diagnosis,species,processedDataType,metadataType,slideID,biospecimenID,RObjectClass +Assay,BiospecimenID,BiospecimenSubtype,BiospecimenType,CellType,Component,DataSubtype,DataType,Diagnosis,FileFormat,MetadataType,ParentBiospecimenID,PrimaryCellSource,ProcessedDataType,Program,Project,RObjectClass,ResourceType,SlideID,Species,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID,UserDefinedCellType,VisitID diff --git a/model_templates/ark.BDMscRNASeqProcessedDataAnnotations.csv b/model_templates/ark.BDMscRNASeqProcessedDataAnnotations.csv index 1909341a..df74f459 100644 --- a/model_templates/ark.BDMscRNASeqProcessedDataAnnotations.csv +++ b/model_templates/ark.BDMscRNASeqProcessedDataAnnotations.csv @@ -1 +1 @@ -assay,fileFormat,project,biospecimenType,diagnosis,primaryCellSource,dataLevel,visitID,cellRangerOutput,libraryPrepMethod,nucleicAcidSource,program,userDefinedCellType,biospecimenSubtype,species,cellType,specimenModality,softwareAndVersion,resourceType,dataType,dataSubtype,Component,parentBiospecimenID,alignmentReference,targetPanelSize,targetPanelSynID,targetPanel,RObjectClass,individualID,biospecimenID,metadataType,processedDataType +AlignmentReference,Assay,BiospecimenID,BiospecimenSubtype,BiospecimenType,CellRangerOutput,CellType,Component,DataLevel,DataSubtype,DataType,Diagnosis,FileFormat,IndividualID,LibraryPrepMethod,MetadataType,NucleicAcidSource,ParentBiospecimenID,PrimaryCellSource,ProcessedDataType,Program,Project,RObjectClass,ResourceType,SoftwareAndVersion,Species,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID,UserDefinedCellType,VisitID diff --git a/model_templates/ark.BDMscVDJSeqProcessedDataAnnotations.csv b/model_templates/ark.BDMscVDJSeqProcessedDataAnnotations.csv index 11f71f3a..80e759fc 100644 --- a/model_templates/ark.BDMscVDJSeqProcessedDataAnnotations.csv +++ b/model_templates/ark.BDMscVDJSeqProcessedDataAnnotations.csv @@ -1 +1 @@ -cellType,softwareAndVersion,dataLevel,species,fileFormat,program,visitID,primaryCellSource,specimenModality,dataType,alignmentReference,biospecimenSubtype,parentBiospecimenID,diagnosis,Component,libraryPrepMethod,resourceType,nucleicAcidSource,userDefinedCellType,dataSubtype,project,biospecimenType,assay,RObjectClass,individualID,biospecimenID,metadataType,processedDataType,targetPanel,targetPanelSize,targetPanelSynID +AlignmentReference,Assay,BiospecimenID,BiospecimenSubtype,BiospecimenType,CellType,Component,DataLevel,DataSubtype,DataType,Diagnosis,FileFormat,IndividualID,LibraryPrepMethod,MetadataType,NucleicAcidSource,ParentBiospecimenID,PrimaryCellSource,ProcessedDataType,Program,Project,RObjectClass,ResourceType,SoftwareAndVersion,Species,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID,UserDefinedCellType,VisitID diff --git a/model_templates/ark.BDMsnATACSeqProcessedDataAnnotations.csv b/model_templates/ark.BDMsnATACSeqProcessedDataAnnotations.csv index 7fc8797f..df74f459 100644 --- a/model_templates/ark.BDMsnATACSeqProcessedDataAnnotations.csv +++ b/model_templates/ark.BDMsnATACSeqProcessedDataAnnotations.csv @@ -1 +1 @@ -biospecimenType,dataLevel,project,dataType,assay,biospecimenSubtype,libraryPrepMethod,resourceType,visitID,diagnosis,parentBiospecimenID,cellRangerOutput,softwareAndVersion,primaryCellSource,userDefinedCellType,specimenModality,cellType,nucleicAcidSource,dataSubtype,alignmentReference,Component,program,species,fileFormat,targetPanelSynID,targetPanelSize,targetPanel,processedDataType,metadataType,biospecimenID,individualID,RObjectClass +AlignmentReference,Assay,BiospecimenID,BiospecimenSubtype,BiospecimenType,CellRangerOutput,CellType,Component,DataLevel,DataSubtype,DataType,Diagnosis,FileFormat,IndividualID,LibraryPrepMethod,MetadataType,NucleicAcidSource,ParentBiospecimenID,PrimaryCellSource,ProcessedDataType,Program,Project,RObjectClass,ResourceType,SoftwareAndVersion,Species,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID,UserDefinedCellType,VisitID diff --git a/model_templates/ark.BDMsnRNASeqProcessedDataAnnotations.csv b/model_templates/ark.BDMsnRNASeqProcessedDataAnnotations.csv index d0da10da..df74f459 100644 --- a/model_templates/ark.BDMsnRNASeqProcessedDataAnnotations.csv +++ b/model_templates/ark.BDMsnRNASeqProcessedDataAnnotations.csv @@ -1 +1 @@ -biospecimenType,nucleicAcidSource,libraryPrepMethod,dataSubtype,dataLevel,cellType,cellRangerOutput,biospecimenSubtype,project,visitID,program,softwareAndVersion,specimenModality,primaryCellSource,Component,dataType,resourceType,species,diagnosis,assay,fileFormat,alignmentReference,userDefinedCellType,parentBiospecimenID,individualID,biospecimenID,metadataType,processedDataType,targetPanelSynID,targetPanelSize,targetPanel,RObjectClass +AlignmentReference,Assay,BiospecimenID,BiospecimenSubtype,BiospecimenType,CellRangerOutput,CellType,Component,DataLevel,DataSubtype,DataType,Diagnosis,FileFormat,IndividualID,LibraryPrepMethod,MetadataType,NucleicAcidSource,ParentBiospecimenID,PrimaryCellSource,ProcessedDataType,Program,Project,RObjectClass,ResourceType,SoftwareAndVersion,Species,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID,UserDefinedCellType,VisitID diff --git a/model_templates/ark.BiospecimenMetadataTemplate.csv b/model_templates/ark.BiospecimenMetadataTemplate.csv index 008f3ec0..fdcdae24 100644 --- a/model_templates/ark.BiospecimenMetadataTemplate.csv +++ b/model_templates/ark.BiospecimenMetadataTemplate.csv @@ -1 +1 @@ -notes,program,project,biospecimenType,Component,biospecimenID,individualID,altSampleID,parentBiospecimenID,sampleCollectionBatch,biospecimenSubtype,visitID,skinSiteStatus,anatomicalSite,salivaCollectionProcedure,primaryCellSource,cellType,cellOntologyID,krennLining,krennInflammatory,synovialCollectionProcedure,krennStroma,krennSynovitisScore,userDefinedCellType,FACSPopulation +AltSampleID,AnatomicalSite,BiospecimenID,BiospecimenSubtype,BiospecimenType,CellOntologyID,CellType,Component,FACSPopulation,IndividualID,KrennInflammatory,KrennLining,KrennStroma,KrennSynovitisScore,Notes,ParentBiospecimenID,PrimaryCellSource,Program,Project,SalivaCollectionProcedure,SampleCollectionBatch,SkinSiteStatus,SynovialCollectionProcedure,UserDefinedCellType,VisitID diff --git a/model_templates/ark.BulkATAC-seqAssayMetadataTemplate.csv b/model_templates/ark.BulkATAC-seqAssayMetadataTemplate.csv index a1059f48..bd070a6f 100644 --- a/model_templates/ark.BulkATAC-seqAssayMetadataTemplate.csv +++ b/model_templates/ark.BulkATAC-seqAssayMetadataTemplate.csv @@ -1 +1 @@ -totalReads,specimenModality,softwareAndVersion,libraryPrepMethod,dataCollectionBatch,nucleicAcidSource,Component,sampleProcessingBatch,assay,alignmentReference,platform,biospecimenID,libraryID,10xProbeSetReference +10xProbeSetReference,AlignmentReference,Assay,BiospecimenID,Component,DataCollectionBatch,LibraryID,LibraryPrepMethod,NucleicAcidSource,Platform,SampleProcessingBatch,SoftwareAndVersion,SpecimenModality,TotalReads diff --git a/model_templates/ark.BulkRNA-seqAssayMetadataTemplate.csv b/model_templates/ark.BulkRNA-seqAssayMetadataTemplate.csv index ae2aa148..bd070a6f 100644 --- a/model_templates/ark.BulkRNA-seqAssayMetadataTemplate.csv +++ b/model_templates/ark.BulkRNA-seqAssayMetadataTemplate.csv @@ -1 +1 @@ -dataCollectionBatch,sampleProcessingBatch,specimenModality,Component,libraryPrepMethod,platform,softwareAndVersion,totalReads,alignmentReference,nucleicAcidSource,assay,biospecimenID,libraryID,10xProbeSetReference +10xProbeSetReference,AlignmentReference,Assay,BiospecimenID,Component,DataCollectionBatch,LibraryID,LibraryPrepMethod,NucleicAcidSource,Platform,SampleProcessingBatch,SoftwareAndVersion,SpecimenModality,TotalReads diff --git a/model_templates/ark.ClinicalMetadataTemplate.csv b/model_templates/ark.ClinicalMetadataTemplate.csv index f9c54595..e82d5d3e 100644 --- a/model_templates/ark.ClinicalMetadataTemplate.csv +++ b/model_templates/ark.ClinicalMetadataTemplate.csv @@ -1 +1 @@ -heightUnits,comorbidities,age,ethnicity,ageUnits,weight,program,Component,species,individualID,sex,race,weightUnits,project,height,diagnosis,PASI,diabetesType,visitID,CDASI,VASI,VETI,vitiligoPattern,VIDA +Age,AgeUnits,CDASI,Comorbidities,Component,DiabetesType,Diagnosis,Ethnicity,Height,HeightUnits,IndividualID,PASI,Program,Project,Race,Sex,Species,VASI,VETI,VIDA,VisitID,VitiligoPattern,Weight,WeightUnits diff --git a/model_templates/ark.CyTOFAssayMetadataTemplate.csv b/model_templates/ark.CyTOFAssayMetadataTemplate.csv index 9644aa59..44752f49 100644 --- a/model_templates/ark.CyTOFAssayMetadataTemplate.csv +++ b/model_templates/ark.CyTOFAssayMetadataTemplate.csv @@ -1 +1 @@ -softwareAndVersion,dataCollectionBatch,Component,targetPanelSynID,biospecimenID,sampleProcessingBatch,platform,targetPanelSize,assay,targetPanel +Assay,BiospecimenID,Component,DataCollectionBatch,Platform,SampleProcessingBatch,SoftwareAndVersion,TargetPanel,TargetPanelSize,TargetPanelSynID diff --git a/model_templates/ark.DatasetAnnotationTemplate.csv b/model_templates/ark.DatasetAnnotationTemplate.csv index fe7f3864..249e6215 100644 --- a/model_templates/ark.DatasetAnnotationTemplate.csv +++ b/model_templates/ark.DatasetAnnotationTemplate.csv @@ -1 +1 @@ -acknowledgmentStatement,diagnosis,species,associatedCodeURL,associatedDataset,dataSubtype,project,dataType,assay,ImmPortAccession,biospecimenType,publicationSynID,ARKRelease,datasetStatus,datasetDescription,associatedAccession,Component,datasetType,program,biospecimenSubtype,programPhase +ARKRelease,AcknowledgmentStatement,Assay,AssociatedAccession,AssociatedCodeURL,AssociatedDataset,BiospecimenSubtype,BiospecimenType,Component,DataSubtype,DataType,DatasetDescription,DatasetStatus,DatasetType,Diagnosis,ImmPortAccession,Program,ProgramPhase,Project,PublicationSynID,Species diff --git a/model_templates/ark.FCSFileAnnotationTemplate.csv b/model_templates/ark.FCSFileAnnotationTemplate.csv index 80afbcda..17d13c98 100644 --- a/model_templates/ark.FCSFileAnnotationTemplate.csv +++ b/model_templates/ark.FCSFileAnnotationTemplate.csv @@ -1 +1 @@ -fileFormat,eventCount,specimenModality,Component,assay,dataCollectionBatch,sampleProcessingBatch,individualID,biospecimenID +Assay,BiospecimenID,Component,DataCollectionBatch,EventCount,FileFormat,IndividualID,SampleProcessingBatch,SpecimenModality diff --git a/model_templates/ark.FastqFileAnnotationTemplate.csv b/model_templates/ark.FastqFileAnnotationTemplate.csv index 8e828c42..9623c883 100644 --- a/model_templates/ark.FastqFileAnnotationTemplate.csv +++ b/model_templates/ark.FastqFileAnnotationTemplate.csv @@ -1 +1 @@ -Component,fileFormat,specimenModality,readLength,assay,libraryID,biospecimenID,individualID,targetPanel,targetPanelSynID,targetPanelSize +Assay,BiospecimenID,Component,FileFormat,IndividualID,LibraryID,ReadLength,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID diff --git a/model_templates/ark.InVitroBiospecimenMetadataTemplate.csv b/model_templates/ark.InVitroBiospecimenMetadataTemplate.csv index d56c4e8f..21d94f93 100644 --- a/model_templates/ark.InVitroBiospecimenMetadataTemplate.csv +++ b/model_templates/ark.InVitroBiospecimenMetadataTemplate.csv @@ -1 +1 @@ -program,biospecimenID,notes,altSampleID,biospecimenType,individualID,parentBiospecimenID,biospecimenSubtype,Component,project,treatmentTimepoint,sampleCollectionBatch,treatment,visitID,primaryCellSource,cellType,cellOntologyID,anatomicalSite,skinSiteStatus,synovialCollectionProcedure,salivaCollectionProcedure,krennLining,krennInflammatory,krennSynovitisScore,krennStroma,FACSPopulation,userDefinedCellType +AltSampleID,AnatomicalSite,BiospecimenID,BiospecimenSubtype,BiospecimenType,CellOntologyID,CellType,Component,FACSPopulation,IndividualID,KrennInflammatory,KrennLining,KrennStroma,KrennSynovitisScore,Notes,ParentBiospecimenID,PrimaryCellSource,Program,Project,SalivaCollectionProcedure,SampleCollectionBatch,SkinSiteStatus,SynovialCollectionProcedure,Treatment,TreatmentTimepoint,UserDefinedCellType,VisitID diff --git a/model_templates/ark.OlinkAssayMetadataTemplate.csv b/model_templates/ark.OlinkAssayMetadataTemplate.csv index 2731a7f9..5872704c 100644 --- a/model_templates/ark.OlinkAssayMetadataTemplate.csv +++ b/model_templates/ark.OlinkAssayMetadataTemplate.csv @@ -1 +1 @@ -targetPanelSynID,targetPanel,Component,targetPanelSize,platform,plateID,assay +Assay,Component,PlateID,Platform,TargetPanel,TargetPanelSize,TargetPanelSynID diff --git a/model_templates/ark.OlinkFileAnnotationTemplate.csv b/model_templates/ark.OlinkFileAnnotationTemplate.csv index 447c2338..ff8a582c 100644 --- a/model_templates/ark.OlinkFileAnnotationTemplate.csv +++ b/model_templates/ark.OlinkFileAnnotationTemplate.csv @@ -1 +1 @@ -fileFormat,specimenModality,plateID,resourceType,Component +Component,FileFormat,PlateID,ResourceType,SpecimenModality diff --git a/model_templates/ark.PublicationMetadataTemplate.csv b/model_templates/ark.PublicationMetadataTemplate.csv index e13c167a..9d580840 100644 --- a/model_templates/ark.PublicationMetadataTemplate.csv +++ b/model_templates/ark.PublicationMetadataTemplate.csv @@ -1 +1 @@ -PMID,associatedDataset,Component,publicationDate,publicationType,title,DOI,PMCID,year,program,journal,project,programPhase +AssociatedDataset,Component,DOI,Journal,PMCID,PMID,Program,ProgramPhase,Project,PublicationDate,PublicationType,Title,Year diff --git a/model_templates/ark.ScRNASeqAssayMetadataTemplate.csv b/model_templates/ark.ScRNASeqAssayMetadataTemplate.csv index 5d8417ed..7c5d5feb 100644 --- a/model_templates/ark.ScRNASeqAssayMetadataTemplate.csv +++ b/model_templates/ark.ScRNASeqAssayMetadataTemplate.csv @@ -1 +1 @@ -alignmentReference,sampleProcessingBatch,dataCollectionBatch,assay,inputCellCount,libraryPrepMethod,totalReads,sequencingSaturation,softwareAndVersion,specimenModality,Component,platform,nucleicAcidSource,percentCellViability,10xProbeSetReference,biospecimenID,libraryID +10xProbeSetReference,AlignmentReference,Assay,BiospecimenID,Component,DataCollectionBatch,InputCellCount,LibraryID,LibraryPrepMethod,NucleicAcidSource,PercentCellViability,Platform,SampleProcessingBatch,SequencingSaturation,SoftwareAndVersion,SpecimenModality,TotalReads diff --git a/model_templates/ark.ScRNASeqProcessedDataAnnotationTemplate.csv b/model_templates/ark.ScRNASeqProcessedDataAnnotationTemplate.csv index e017fab1..34b3e4ba 100644 --- a/model_templates/ark.ScRNASeqProcessedDataAnnotationTemplate.csv +++ b/model_templates/ark.ScRNASeqProcessedDataAnnotationTemplate.csv @@ -1 +1 @@ -resourceType,assay,fileFormat,dataLevel,Component,cellRangerOutput,specimenModality,metadataType,processedDataType,targetPanelSize,targetPanelSynID,targetPanel,RObjectClass,individualID,biospecimenID +Assay,BiospecimenID,CellRangerOutput,Component,DataLevel,FileFormat,IndividualID,MetadataType,ProcessedDataType,RObjectClass,ResourceType,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID diff --git a/model_templates/ark.ScVDJSeqProcessedDataAnnotationTemplate.csv b/model_templates/ark.ScVDJSeqProcessedDataAnnotationTemplate.csv index 76dc5a9d..5c272687 100644 --- a/model_templates/ark.ScVDJSeqProcessedDataAnnotationTemplate.csv +++ b/model_templates/ark.ScVDJSeqProcessedDataAnnotationTemplate.csv @@ -1 +1 @@ -dataLevel,Component,specimenModality,fileFormat,assay,resourceType,biospecimenID,individualID,RObjectClass,targetPanelSize,targetPanelSynID,targetPanel,metadataType,processedDataType +Assay,BiospecimenID,Component,DataLevel,FileFormat,IndividualID,MetadataType,ProcessedDataType,RObjectClass,ResourceType,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID diff --git a/model_templates/ark.SnATAC-seqAssayMetadataTemplate.csv b/model_templates/ark.SnATAC-seqAssayMetadataTemplate.csv index ff75dde3..7c5d5feb 100644 --- a/model_templates/ark.SnATAC-seqAssayMetadataTemplate.csv +++ b/model_templates/ark.SnATAC-seqAssayMetadataTemplate.csv @@ -1 +1 @@ -assay,nucleicAcidSource,specimenModality,dataCollectionBatch,softwareAndVersion,alignmentReference,totalReads,percentCellViability,platform,Component,sequencingSaturation,sampleProcessingBatch,inputCellCount,libraryPrepMethod,libraryID,biospecimenID,10xProbeSetReference +10xProbeSetReference,AlignmentReference,Assay,BiospecimenID,Component,DataCollectionBatch,InputCellCount,LibraryID,LibraryPrepMethod,NucleicAcidSource,PercentCellViability,Platform,SampleProcessingBatch,SequencingSaturation,SoftwareAndVersion,SpecimenModality,TotalReads diff --git a/model_templates/ark.SnATACSeqProcessedDataAnnotationTemplate.csv b/model_templates/ark.SnATACSeqProcessedDataAnnotationTemplate.csv index 157f4c03..34b3e4ba 100644 --- a/model_templates/ark.SnATACSeqProcessedDataAnnotationTemplate.csv +++ b/model_templates/ark.SnATACSeqProcessedDataAnnotationTemplate.csv @@ -1 +1 @@ -assay,Component,resourceType,specimenModality,fileFormat,dataLevel,cellRangerOutput,targetPanelSize,targetPanelSynID,targetPanel,metadataType,processedDataType,individualID,biospecimenID,RObjectClass +Assay,BiospecimenID,CellRangerOutput,Component,DataLevel,FileFormat,IndividualID,MetadataType,ProcessedDataType,RObjectClass,ResourceType,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID diff --git a/model_templates/ark.SnRNASeqAssayMetadataTemplate.csv b/model_templates/ark.SnRNASeqAssayMetadataTemplate.csv index e529f24a..7c5d5feb 100644 --- a/model_templates/ark.SnRNASeqAssayMetadataTemplate.csv +++ b/model_templates/ark.SnRNASeqAssayMetadataTemplate.csv @@ -1 +1 @@ -dataCollectionBatch,percentCellViability,softwareAndVersion,alignmentReference,nucleicAcidSource,sampleProcessingBatch,Component,platform,assay,inputCellCount,sequencingSaturation,totalReads,libraryPrepMethod,specimenModality,10xProbeSetReference,libraryID,biospecimenID +10xProbeSetReference,AlignmentReference,Assay,BiospecimenID,Component,DataCollectionBatch,InputCellCount,LibraryID,LibraryPrepMethod,NucleicAcidSource,PercentCellViability,Platform,SampleProcessingBatch,SequencingSaturation,SoftwareAndVersion,SpecimenModality,TotalReads diff --git a/model_templates/ark.SnRNASeqProcessedDataAnnotationTemplate.csv b/model_templates/ark.SnRNASeqProcessedDataAnnotationTemplate.csv index fcc7caf1..34b3e4ba 100644 --- a/model_templates/ark.SnRNASeqProcessedDataAnnotationTemplate.csv +++ b/model_templates/ark.SnRNASeqProcessedDataAnnotationTemplate.csv @@ -1 +1 @@ -resourceType,fileFormat,specimenModality,assay,cellRangerOutput,Component,dataLevel,metadataType,processedDataType,RObjectClass,individualID,biospecimenID,targetPanelSize,targetPanel,targetPanelSynID +Assay,BiospecimenID,CellRangerOutput,Component,DataLevel,FileFormat,IndividualID,MetadataType,ProcessedDataType,RObjectClass,ResourceType,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID diff --git a/model_templates/ark.SpatialImagingAssayMetadataTemplate.csv b/model_templates/ark.SpatialImagingAssayMetadataTemplate.csv index 1b5cc61c..6e886fb9 100644 --- a/model_templates/ark.SpatialImagingAssayMetadataTemplate.csv +++ b/model_templates/ark.SpatialImagingAssayMetadataTemplate.csv @@ -1 +1 @@ -targetPanelSize,assay,specimenModality,Component,targetPanelSynID,sampleProcessingBatch,targetPanel,dataCollectionBatch,slideID,biospecimenID +Assay,BiospecimenID,Component,DataCollectionBatch,SampleProcessingBatch,SlideID,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID diff --git a/model_templates/ark.SpatialImagingFileAnnotationTemplate.csv b/model_templates/ark.SpatialImagingFileAnnotationTemplate.csv index d347c012..0ae7e4d6 100644 --- a/model_templates/ark.SpatialImagingFileAnnotationTemplate.csv +++ b/model_templates/ark.SpatialImagingFileAnnotationTemplate.csv @@ -1 +1 @@ -fileFormat,assay,Component,resourceType,specimenModality,RObjectClass,metadataType,processedDataType,slideID,biospecimenID +Assay,BiospecimenID,Component,FileFormat,MetadataType,ProcessedDataType,RObjectClass,ResourceType,SlideID,SpecimenModality diff --git a/utils/context_specific_models.py b/utils/context_specific_models.py index 73824855..68345702 100644 --- a/utils/context_specific_models.py +++ b/utils/context_specific_models.py @@ -5,6 +5,11 @@ import sys import time +''' +this script will take each context.csv and combine with the ark.all_attributes.csv +to generate context-specific model csv files. +''' + #### #### Functions #### diff --git a/utils/generate_csv_templates.py b/utils/generate_csv_templates.py index b26a45bd..c4a2b94d 100644 --- a/utils/generate_csv_templates.py +++ b/utils/generate_csv_templates.py @@ -3,6 +3,12 @@ import sys import json +''' +this script generates a "blank" csv file for every json schema in model_json_schema/ +which are used for various downstream purposes including the data dictionary site +and BDM curation work +''' + #### #### Functions #### diff --git a/utils/generate_jsonschema.py b/utils/generate_jsonschema.py new file mode 100644 index 00000000..2a6ea415 --- /dev/null +++ b/utils/generate_jsonschema.py @@ -0,0 +1,30 @@ +from synapseclient import Synapse +from synapseclient.extensions.curator import generate_jsonschema +import pandas as pd + +''' +use synapseclient extension to create Curator json schema from context models +''' + +# create synapse client obj, this will be unnecessary in future client releases +syn = Synapse() + +# read in compiled set of templates for each context +templates = pd.read_table("templates_by_context.txt", header=None) +templates.columns = ['template', 'context'] +templates = templates.groupby(['context']).agg({'template': lambda x: list(x)}).reset_index() +templates = templates.set_index('context').to_dict()['template'] + +for context in templates.keys(): + print(f"Generating JSON schemas for context: {context}") + for t in templates[context]: + schemas, file_paths = generate_jsonschema( + data_model_source=f"model_contexts/{context}/ark.{context}_model.csv", + output=f"model_json_schema/ark.{t}.schema.json", + data_types= [t], + synapse_client=syn + ) + +print("JSON schema generation complete!") + +# END