diff --git a/.github/workflows/ci-schema-convert.yml b/.github/workflows/ci-schema-convert.yml index 2a90e6d1..213bd1cb 100644 --- a/.github/workflows/ci-schema-convert.yml +++ b/.github/workflows/ci-schema-convert.yml @@ -11,10 +11,10 @@ on: - '.github/workflows/ci-schema-convert.yml' - 'utils/**' - 'model_contexts/**' + workflow_dispatch: env: - SCHEMATIC_VERSION: 24.11.2 - SCHEMATIC_SERVICE_ACCOUNT_CREDS: ${{ secrets.SCHEMATIC_SERVICE_ACCOUNT_CREDS }} + CLIENT_VERSION: 4.11.0 jobs: build: @@ -28,28 +28,20 @@ jobs: # Set up supported python. - uses: actions/setup-python@v5 with: - python-version: '3.10.12' + python-version: '3.10.19' - name: Install Python Packages run: | - pip install schematicpy==${{ env.SCHEMATIC_VERSION }} - pip show schematicpy - - - name: Create creds file - run: | - echo "${SCHEMATIC_SERVICE_ACCOUNT_CREDS}" > schematic_service_account_creds.json + pip install "synapseclient[pandas, curator]==${{ env.CLIENT_VERSION }}" + pip show synapseclient - name: Create context-specific models run: | python utils/context_specific_models.py - - name: convert model csv to jsonld - run: | - bash utils/schema_convert.sh - - - name: Generate templates + - name: Generate json schemas run: | - bash utils/generate_model_templates.sh + python utils/generate_jsonschema.py - name: Generate blank CSV templates from json schema to support dictionary site build run: | @@ -59,10 +51,6 @@ jobs: run: | rm model_json_schema/ark.BDM* - - name: Clean up sensitive info - run: | - rm schematic_service_account_creds.json - - name: Commit files if there are changes run: | git status diff --git a/.github/workflows/create-template-config.yml b/.github/workflows/create-template-config.yml deleted file mode 100644 index c9f1683b..00000000 --- a/.github/workflows/create-template-config.yml +++ /dev/null @@ -1,80 +0,0 @@ -# -------------------------------------------------------------------------------------------------- -# GitHub Action to create a DCA template config json file for a data model -# -# This action creates a json file named with the `file` input argument using -# the data model supplied to the `data_model` argument. It will validate the -# json file against DCA's template config schema. Finally, it will create a PR -# in the repo for the new file. -# -# Copy this into your data model repo .github/workflow directory. -# Your repo settings must have Actions enabled and must allow GitHub Actions to -# create and approve pull requests. -# -# By default, this action runs by workflow dispatch. But it can be configured -# to run on other triggers. Consult the github doc below for more information. -# https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#workflow_dispatch -# -# The resulting file with contain one entry for each attribute in the data model that `dependsOn` -# "Component". To include a subset of these attributes, use either `include_data_types` or -# `exclude_data_types` in the call to `datacurator::write_dca_template_config()` -# -# -------------------------------------------------------------------------------------------------- - -name: DCA Template Config File -on: - workflow_dispatch: - inputs: - data_model: - description: URL to a jsonld data model file - required: true - file: - description: Directory to save the template config - required: true - include_data_types: - description: Space-separated string of data types to include in output. Must be empty if using exclude_data_types. - required: false - exclude_data_types: - description: Space-separated string of data types to exclude from output. Must be empty if using include_data_types. - required: false - data_model_labels: - description: How schematic gets data model labels. Defaults `class_label`. - default: 'class_label' - required: true - -jobs: - create-template-config: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Checkout DCA Config Repo for Schema - uses: actions/checkout@v4 - with: - repository: 'Sage-Bionetworks/data_curator_config' - ref: 'dev' - path: 'data_curator_config' - - - name: Create config file - uses: sage-bionetworks/dca-template-config-action@main - env: - data_model: ${{ inputs.data_model }} - file: ${{ inputs.file }} - include_data_types: ${{ inputs.include_data_types }} - exclude_data_types: ${{ inputs.exclude_data_types }} - data_model_labels: ${{ inputs.data_model_labels }} - - - name: Validate Config File - uses: docker://orrosenblatt/validate-json-action:latest - env: - INPUT_SCHEMA: 'data_curator_config/schemas/dca_template_config.schema.json' - INPUT_JSONS: ${{ inputs.file }} - - - name: Open PR - uses: peter-evans/create-pull-request@v5 - with: - title: Update DCA Template Config File - body: Recreate the json file that populates the DCA template dropdown menu. - delete-branch: true - branch-suffix: timestamp - add-paths: | - ${{ inputs.file }} diff --git a/ark.all_attributes.csv b/ark.all_attributes.csv index 6c9c246e..0dba1c07 100644 --- a/ark.all_attributes.csv +++ b/ark.all_attributes.csv @@ -1,110 +1,110 @@ -"Attribute","Description","Valid Values","DependsOn","Properties","Required","Parent","DependsOn Component","Source","Validation Rules","columnType" -"Component","A high-level attribute for grouping attributes into templates.","","","","True","","","","","string" -"fileFormat","Standard file format name or file extension","bai, bam, bed, bim, csv, czi, docx, dose, erate, fam, fastq, fcs, geojson, h5, h5ad, info, mcd, mtx, parquet, pdf, py, rds, rec, svs, tbi, tgz, tsv, txt, vcf, xls, xlsx, zip","","","True","","","","","string" -"publicationSynID","The synID of the corresponding Synapse entity that stores metadata about the publication. This is used to differentiate publication-specific files, often consisting of level 4 processed data and expanded subject metadata, in a publication dataset that also includes raw or minimally processed files from experimental datasets. This provides an easy way to distinguish and select for the publication-specific data from which the research findings were derived. When this attribute is used to annotate a Dataset it serves as a way to directly link the Dataset entity with the publication metadata stored in Synapse.","","","","False","","","","regex search ^syn[0-9]{8} error","string" -"associatedDataset","The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.","","","","False","","","","list like::regex search ^syn[0-9]{8} error","string" -"program","Name of the funding program that supported the generation of data and associated files","AMP AIM, AMP RA/SLE, Community Contribution","","","True","","","","","string" -"project","A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.","AIM for RA, ELLIPSS, LOCKIT, RA, SLE, STAMP, UMass V-CoRT","","","True","","","","list like error","string" -"programPhase","A label noting which AMP RA/SLE program phase generated the data.","I, II","","","True","","","","list like error","string" -"resourceType","High-level classification of the file content","code, experimental data, figure, metadata","","","True","","","","","string" -"dataType","High-level classification of the type of data contained in the file, loosely related to the experimental method or biological entity that is being profiled. Select all that apply using a comma-delimited list, though in most cases only a single label is expected. For multimodal datasets with concomitant profiling of biospecimen include 'multimodal'.","cytometry, epigenomics, genomics, histology, immune repertoire profiling, immunostaining, lipidomics, metabolomics, microbiome, multimodal, proteomics, transcriptomics","","","True","","","","list like error","string" -"dataSubtype","General classification to differentiate between omics profiling modalities. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list, however this is largely only expected in the context of Datasets and files that contain integrated experimental data spanning multiple types.","bulk, none, pseudobulk, single-cell, single-nucleus, spatial","","","True","","","","list like error","string" -"dataLevel","Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.","1, 2, 3, 4, 5","","","True","","","","","string" -"assay","The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.","ASAPSeq, CE-MS, CITESeq, CosMX, CyTOF, GenePS SeqFISH, H&E, LC-MS/MS, NULISA, Olink Explore HT, Olink Flex, Olink Focus, Olink Reveal, Olink Target 48, Olink Target 96, RNASeq, SNP array, SomaScan, VDJSeq, Visium, WES, WGS, Xenium, feature barcode sequencing, flow cytometry, imaging mass cytometry, imaging mass spectrometry, kiloplex, multiplexed ELISA, scRNASeq, scVDJSeq, serial IHC, snATACSeq, snRNASeq","","","True","","","","list like error","string" -"10xProbeSetReference","Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.","Flex Human Transcriptome Probe Set v1.0.1, Flex Human Transcriptome Probe Set v1.1.0, Visium Human Transcriptome v1, Visium Human Transcriptome v2, custom probe set","","","True","","","","","string" -"custom10xProbeSetSynID","If custom modified probe sets were used for collecting 10x Chromium Flex scRNA-seq data, then the probe reference files should be grouped as a zip or tar archive and uploaded as metadata. This attribute links the experimental data back to the probe set archive file via a synapse ID.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"platform","The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.","BD FACSAria Fusion cell sorter, BD FACSAria III, BD FACSCanto, BD FACSCanto II, BD FACSDiscover A8, BD FACSDiscover S8, BD FACSLyric Clinical, BD FACSMelody, BD FACSymphony S6, BD LSRFortessa, Chromium Controller, Chromium GEM-X Single Cell 3' Chip v4, Chromium Next GEM Chip G, Chromium Next GEM Chip H, Chromium Next GEM Chip K, Chromium Next GEM Chip M, Chromium Next GEM Chip Q, Chromium X, Chromium Xo, Chromium iX, CyTOF XT, Cytek Aurora, Cytek Aurora Evo, Fluidigm BioMark, GEM-X Flex Gene Expression Chip, GEM-X OCM 5' Chip, Helios Mass Cytometer, Hyperion, Illumina HiSeq 2500, Illumina HiSeq X Ten, Illumina NextSeq 500, Illumina NovaSeq 6000, Illumina NovaSeq X, Not Applicable, Olink Signature Q100, Sony MA900, Thermo Fisher Attune CytPix, Thermo Fisher Attune NxT, Thermo Fisher Attune Xenith, Visium CytAssist, Xenium, none, unknown","","","True","","","","list like error","string" -"specimenModality","Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens","multispecimen, single specimen, unknown","","","True","","","","","string" -"individualID","Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.","","","","True","","","","#ClinicalMetadataTemplate unique error^^#BiospecimenMetadataTemplate str^^list like error","string" -"biospecimenID","A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.","","","","True","","","","#BiospecimenMetadataTemplate unique error^^list like error","string" -"parentBiospecimenID","The biospecimenID associated with the originating biospecimen for derived or child biospecimens.","","","","False","","","","","string" -"biospecimenType","A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.","PBMCs, cell line, fibroblast-like synoviocyte, kidney biopsy, none, plasma, primary cell culture, saliva, salivary gland, serum, skin biopsy, skin swab, stool, suction blister cells, suction blister fluid, synovial fluid, synovial tissue, total leukocytes, urine, uvea, whole blood","","","True","","","","#BiospecimenMetadataTemplate str^^#InVitroBiospecimenMetadataTemplate^^list like error","string" -"primaryCellSource","A label indicating the biological source material from which a primary cell culture was derived.","PBMCs, kidney, pannus-derived dermis, pannus-derived epidermis, salivary gland, synovial tissue, total leukocytes, urine, uvea, whole blood","","","True","","","","","string" -"metadataType","A label further classifying the content of metadata resource.","assay, biospecimen, cell coordinates, data dictionary, file manifest, medication, other, phenotype, protocol, single-cell metadata, target panel, template, tissue microarray map, tissue multiarray map, user manual","","","True","","","","","string" -"codingLanguage","The coding, aka programming, language(s) contained in the file marked with `resourceType = code`. Select all that apply.","C, C#, C++, Fortan, Java, Julia, Matlab, Python, R, Ruby, SAS","","","True","","","","","string" -"visitID","Ordinal ID distinguishing different patient visits.","","","","True","","","","","string" -"softwareAndVersion","Relevant software and version used to generate the data file.","BD FACSDiva 8.0.1, Cell Ranger 9.0.1, Cell Ranger ATAC v1.1.0, Cell Ranger v3.0.0, Cell Ranger v3.0.1, Cell Ranger v3.0.2, Cell Ranger v3.1.0, Cell Ranger v4.0.0, Cell Ranger v5.0.0, Cell Ranger v5.0.1, Cell Ranger v6.0.0, Cell Ranger v6.0.1, Cell Ranger v6.0.2, Cell Ranger v6.1.0, Cell Ranger v6.1.1, Cell Ranger v6.1.2, Cell Ranger v7.0.0, Cell Ranger v7.0.1, Cell Ranger v7.1.0, Cell Ranger v7.2.0, Cell Ranger v8.0.0, Cell Ranger v8.0.1, Cell Ranger v9.0.0, Space Ranger 3.0.0, Space Ranger 3.0.1, Space Ranger 3.1.0, Space Ranger 3.1.1, Space Ranger 3.1.2, Space Ranger 3.1.3, demuxlet","","","False","","","","","string" -"targetPanel","A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.","","","","True","","","","","string" -"targetPanelSynID","In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"targetPanelSize","The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).","","","","False","","","","int error","integer" -"nucleicAcidSource","The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.","BCR mRNA, CRISPR protospacer feature barcode, TCR mRNA, Tn5-accessible gDNA, antigen capture barcode, gDNA, globin-depleted RNA, intracellular protein feature barcode, multiplexing oligo, poly(A) RNA, rRNA-depleted RNA, surface protein feature barcode","","","True","","","","list like error","string" -"biospecimenSubtype","Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.","FFPE tissue, PFA-fixed tissue, cell or tissue lysate, cell suspension, flow-sorted cells, fresh tissue, frozen tissue, nuclei suspension, supernatant","","","False","","","","","string" -"cellRangerOutput","10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz","Not Applicable, filtered MEX, filtered_feature_bc_matrix, filtered_peak_bc_matrix, raw MEX, raw_feature_bc_matrix, raw_peak_bc_matrix","","","True","","","","","string" -"alignmentReference","The genomic/transcriptomic reference used for performing read alignment against.","10x Cell Ranger Human GRCh38 2020-A, 10x Cell Ranger Human GRCh38 2024-A, GRCh38, modified GRCh38, unknown, vdj_GRCh38_alts_ensembl-4.0.0","","","True","","","","","string" -"libraryPrepMethod","Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.","10x Chromium Fixed RNA Human Transcriptome, 10x Chromium GEM-X Single Cell 3' v4, 10x Chromium GEM-X Single Cell 5' v3, 10x Chromium Next GEM Single Cell 3', 10x Chromium Next GEM Single Cell 3' 3.1, 10x Chromium Next GEM Single Cell 5' v1.1, 10x Chromium Next GEM Single Cell 5' v2, 10x Chromium Next GEM Single Cell ATAC v2, 10x Chromium Single Cell Human BCR, 10x Chromium Single Cell Human TCR, 10x GEM-X Flex Gene Expression Human, 10x GEM-X Universal 5' Gene Expression v3, CEL-Seq2, Chromium Next GEM Single Cell ATAC v1.1, Fluidigm C1 HT, NEBNext Human Immune Sequencing Kit, NEBNext Ultra II Directional RNA Library, Nextera XT, Nextera XT DNA, QIAseq miRNA Library, SMART-Seq Human BCR with UMI, SMART-Seq Human TCR with UMI, SMART-Seq v4 Ultra Low Input RNA, SMARTer Stranded Total RNA v2, Takara Human BCR profiling for Illumina, Takara Human TCR profiling for Illumina, Takara Human TCRv2 profiling for Illumina, Takara Human scTCR profiling for Illumina, TruSeq Stranded mRNA, custom DASH-treatment, in-house library prep","","","True","","","","","string" -"datasetType","High-level classification of dataset entity distinguishing between datasets compiled for a specific publication or as a general data resource.","experimental, publication","","","True","","","","","string" -"acknowledgmentStatement","A Dataset-specific attribute specifying the path to the wiki subpage within the ARK Portal - backend project that contains the acknowledgement statement that must be included in publications using data from the given dataset as a stipulation of the conditions of use.","syn26710600/wiki/619685","","","True","","","","","string" -"datasetDescription","A Dataset-specific attribute specifying the synID of the folder that contains a wiki write-up of the dataset description. This wiki content will be surfaced on the ARK Portal frontend site.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"ARKRelease","A Dataset-specific attribute specifying the ARK Portal release in which this dataset was first made available to the public.","1.0, 2.0, 2024.06.R1, 2024.07.R1, 2024.08.R1, 2024.09.R1, 2024.10.R1, 2024.12.R1, 2025.01.R1, 2025.02.R1, 2025.03.R1, 2025.04.R1, 2025.05.R1, 2025.06.R1, 2025.07.R1, 2025.08.R1, 2025.09.R1, 2025.10.R1, 2025.11.R1, 2025.12.R1","","","True","","","","","string" -"RObjectClass","Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.","ROCR prediction.object, Seurat object, SummarizedExperiment, Symphony reference, data.frame, list, matrix, sparse matrix, vector","","","False","","","","","string" -"diagnosis","A high-level classifier indicating the disease status of an individual.","At-Risk RA, Not Applicable, OA, RA, SLE, Sjogren's disease, control, cutaneous lupus erythematosus, dermatomyositis, discoid lupus erythematosus, lupus nephritis, psoriasis, psoriatic arthritis, scleroderma, unknown, vitiligo","","","True","","","","list like error","string" -"DOID","Disease ontology identifier associated with `diagnosis`. Attribute values are applied by ARK Portal data managers.","","","","True","","","","regex search ^DOID error","string" -"libraryID","A library label or name, unique within an experiment, used to distinguish sequencing libraries.","","","","True","","","","","string" -"associatedCodeURL","A URL to the repository where associated code is available.","","","","False","","","","","string" -"FMAID","Functional Model of Anatomy ontology ID corresponding to the anatomical site and origin of the biospecimen. This attribute is assigned by ARK Portal data managers.","","","","True","","","https://bioportal.bioontology.org/ontologies/FMA/?p=summary","int error","integer" -"BRENDA","BRENDA Tissue and Enzyme Source Ontology ID (BTO) corresponding to the `biospecimenType`. Attribute values are applied by ARK Portal data managers.","","","","True","","","https://bioportal.bioontology.org/ontologies/BTO","regex search ^BTO error","string" -"skinSiteStatus","Disease manifestation status of skin biospecimen.","healthy control, lesional, lesional proximal, non-lesional","","","True","","","","","string" -"salivaCollectionProcedure","Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.","stimulated, unstimulated","","","True","","","","","string" -"synovialCollectionProcedure","Classification of procedure for synovial tissue collection.","arthroplasty, biopsy, synovectomy, unknown","","","True","","","","","string" -"publicationType","General classification of publication.","correction, peer-reviewed, pre-print","","","True","","","","","string" -"title","Title of the publication.","","","","True","","","","","string" -"journal","Journal in which the publication was released","","","","True","","","","","string" -"year","Year (YYYY) in which the paper was published.","","","","True","","","","regex search [1-2][0-9]{3} error","string" -"publicationDate","The publication date extracted from PubMed database","","","","True","","","","","string" -"PMID","PubMed(R) Identifier","","","","True","","","","regex search ^PMID error","string" -"DOI","Digital object identifier","","","","True","","","","","string" -"anatomicalSite","The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.","left 2nd MCP joint, left ankle joint, left hip joint, left knee joint, left wrist joint, other site, right 1st MTP joint, right 2nd MCP joint, right 2nd MTP joint, right 3rd MCP joint, right ankle joint, right hip joint, right knee joint, right wrist joint, unknown","","","True","","","","","string" -"vitiligoPattern","A high-level classification of vitiligo based on the distribution and patterning of lesions across the body.","mixed, non-segmental, segmental, unclassified","","","False","","","","","string" -"vitiligoPhenotype","Classification of vitiligo lesions which correlate with autoimmune activity and result in specific skin and depigmentation manifestations at the lesion site. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list.","active, confetti, inflammatory, none, trichrome","","","True","","","","list like error","string" -"psoriasisType","General type classification of psoriasis disease manifestation.","erythrodermic, guttate, inverse, plaque, pustular","","","False","","","","","string" -"ageDiagnosis","Age at which subject was diagnosed with `diagnosis`. If providing this value be sure the unit matches that used for `age`.","","","","False","","","","num error","number" -"age","Age at which subject was enrolled in study or age at corresponding visit and data collection event. If value unknown, enter '-1'.","","","","True","","","","num error","number" -"ageUnits","The unit of measure used for `ageEnrollment` and `ageDiagnosis`","months, years","","","True","","","","","string" -"sex","A textual description of a person's sex at birth.","female, intersex, male, unknown","","","True","","","","","string" -"height","Standing height of subject.","","","","True","","","","num error","number" -"race","A textual description of a person's race.","American Indian or Alaska Native, Asian, Black or African American, Hispanic, Mixed Race, Native Hawaiian or Other Pacific Islander, White, other, unknown","","","False","","","","","string" -"ethnicity","The ethnicity of a person.","Hispanic or Latino, Not Hispanic or Latino, unknown","","","False","","","","","string" -"heightUnits","Unit of measure of value provided for `height`.","centimeters, feet, inches, meters","","","True","","","","","string" -"weight","Weight of subject. If value unknown, enter '-1'.","","","","True","","","","num error","number" -"weightUnits","Abbreviated unit of measure of value provided for `weight`.","g, kg, lb, oz","","","True","","","","","string" -"comorbidities","Any diseases or medical condition that is simultaneously present in addition to `diagnosis`.","Hashimoto's Thyroiditis, autoimmune thyroid disease, cardiovascular disease, diabetes, inflammatory bowel disease, multiple sclerosis, other, psoriasis, psoriatic arthritis, pulmonary disease, rheumatoid arthritis, systemic lupus erythematosus","","","False","","","","list like error","string" -"diabetesType","Type of diabetes mellitus.","gestational, type 1, type 2, unknown","","","False","","","","","string" -"VIDA","Vitiligo Disease Activity Score, a six-point scale that evaluates the activity of vitiligo","","","","False","","","","","string" -"VASI","Total body Vitiligo Area Severity Index, a measure of the percentage of vitiligo involvement across the body calculated in terms of hand units.","","","","False","","","","","string" -"VETI","Vitiligo Extent Tensity Index measures the extent of vitiligo by a numerical score and combines analysis of extensity and severity of vitiligo to produce a constant and reproducible number.","","","","False","","","https://doi.org/10.5826/dpc.0404a18","num error","number" -"PASI","Psoriasis Area and Severity Index, is a measurement of the discoloration, thickness, scaling, and coverage of psoriasis plaques.","","","","False","","","","","string" -"CDASI","Cutaneous Dermatomyositis Disease Area and Severity Index, is a measurement used to characterize cutaneous dermatomyositis severity.","","","","False","","","","","string" -"percentCellViability","A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.","","","","True","","","","inRange 50 100 error","integer" -"inputCellCount","An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.","","","","True","","","","int error","integer" -"totalReads","Total number of reads sequenced from the library.","","","","True","","","","int error","integer" -"sequencingSaturation","A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.","","","","False","","","https://kb.10xgenomics.com/hc/en-us/articles/115005062366-What-is-sequencing-saturation","inRange 0 1 error","number" -"ImmPortAccession","Accession to corresponding information in ImmPort.","","","","False","","","","regex search ^SDY error","string" -"FACSPopulation","A description of the marker gating strategy used to derive the population cells with FACS.","","","","True","","","","","string" -"cellOntologyID","Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.","","","","False","","","","regex search ^CL: error","string" -"cellType","The cell type name from Cell Ontology for the corresponding CL identifier.","","","","False","","","","","string" -"userDefinedCellType","User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.","","","","False","","","","","string" -"krennInflammatory","A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennLining","A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennStroma","A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennSynovitisScore","The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"datasetStatus","A categorical label indicating the status of an ARK Portal dataset. This is applied to improve downstream management of datasets as well as various ETL workflows.","deprecated, released, test, under peer review, unreleased","","","True","","","","","string" -"sampleProcessingBatch","A label indicating batching of sample processing or preparation that occurs prior to data collection.","","","","False","","","","","string" -"dataCollectionBatch","A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.","","","","False","","","","","string" -"metadataStandards","Metadata standards used to generate the metadata","ARK data model, user-defined","","","False","","","","","string" -"skinSunExposure","For skin-based biospecimen, this attribute indicates whether the sample was collected from an anatomical site that does or does not receive routine sun exposure.","not sun exposed, sun exposed","","","False","","","","","string" -"sequencingSpikeIn","Pre-made sequencing libraries may be added to your sequencing run to improve sequencing results. If such a 'spike-in' library was used, this attribute specifies the name of the library and the percentage of the spike-in library used in the sequencing run.","","","","False","","","","","string" -"librarySpikeIn","Pre-made collections of nucleic acid fragments can be added to samples during the library construction process to improve downstream quantification and statistical analyses. If such a 'spike-in' was used, this attribute specifies the name and manufacturer of the spike-in.","","","","False","","","","","string" -"readLength","The number of base pairs (bp) sequenced for reads in a fastq file.","","","","False","","","","int error","integer" -"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","programPhase","","False","","","","","" -"plateID","An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.","","","","False","","","","","string" -"PMCID","Pubmed Central Identifier, formatted as a compact URI string that will automatically resolve into a URL based on the Synapse bioregistry; e.g., pmc:PMCxxxxxxxx","","","","False","","","","regex search ^pmc:PMC[0-9]{8} error","string" -"processedDataType","A label used for file annotations to provide a brief description of the processed data file.","barcode counts, differential expression results, epigenomic peaks, gene counts","","","False","","","","list like error","string" -"eventCount","The total number of events detected and captured in the corresponding FCS file. In cytometry, an event corresponds to particles detected and measured by the instrument. This number corresponds to the number of rows in the FCS file and is expected to be a whole integer number.","","","","False","","","","int error","integer" -"notes","Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.","","","","False","","","","","string" -"species","The genus species of sample or subject origin.","Homo sapiens","","","True","","","","","string" -"treatment","A short descriptive label indicating what experimental treatments have been applied to a biospecimen before data capture. Please include the word 'control' to indicate specific treatments that are intended to serve as a control group.","","","","True","","","","","string" -"treatmentTimepoint","Where applicable, specify the timepoint relative to treatment to distinguish different sets of samples that have undergone the same treatment but for different lengths of time. REQUIRED: please specify the unit of time, e.g., secs, mins, hrs, days, etc.","","","","False","","","","","string" -"slideID","A distinct label or name, unique within an experiment, assigned to an imaging slides.","","","","True","","","","","string" -"altSampleID","An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.","","","","False","","","","","string" -"sampleCollectionBatch","A label indicating batching of sample collection or experiment execution that occurs prior to data collection.","","","","False","","","","","string" -"associatedAccession","This is a File and Dataset annotation attribute indicating additional accessions (i.e., unique identifiers) associated with the data when the data has also been submitted to or can be found in other repositories such as GEO, SRA, dbGaP, etc.","","","","False","","","","list like error","string" +"Attribute","Description","Valid Values","DependsOn","Required","Parent","Source","Validation Rules","columnType","Format","Minimum","Maximum","Pattern" +"Component","A high-level attribute for grouping attributes into templates.","","","True","","","","string","","","","" +"fileFormat","Standard file format name or file extension","bai, bam, bed, bim, csv, czi, docx, dose, erate, fam, fastq, fcs, geojson, h5, h5ad, info, mcd, mtx, parquet, pdf, py, rds, rec, svs, tbi, tgz, tsv, txt, vcf, xls, xlsx, zip","","True","","","","string","","","","" +"program","Name of the funding program that supported the generation of data and associated files","AMP AIM, AMP RA/SLE, Community Contribution","","True","","","","string","","","","" +"project","A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.","AIM for RA, ELLIPSS, LOCKIT, RA, SLE, STAMP, UMass V-CoRT","","True","","","list like error","string_list","","","","" +"programPhase","A label noting which AMP RA/SLE program phase generated the data.","I, II","","True","","","list like error","string_list","","","","" +"resourceType","High-level classification of the file content","code, experimental data, figure, metadata","","True","","","","string","","","","" +"dataType","High-level classification of the type of data contained in the file, loosely related to the experimental method or biological entity that is being profiled. Select all that apply using a comma-delimited list, though in most cases only a single label is expected. For multimodal datasets with concomitant profiling of biospecimen include 'multimodal'.","cytometry, epigenomics, genomics, histology, immune repertoire profiling, immunostaining, lipidomics, metabolomics, microbiome, multimodal, proteomics, transcriptomics","","True","","","list like error","string_list","","","","" +"dataSubtype","General classification to differentiate between omics profiling modalities. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list, however this is largely only expected in the context of Datasets and files that contain integrated experimental data spanning multiple types.","bulk, none, pseudobulk, single-cell, single-nucleus, spatial","","True","","","list like error","string_list","","","","" +"dataLevel","Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.","1, 2, 3, 4, 5","","True","","","","string","","","","" +"assay","The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.","ASAPSeq, CE-MS, CITESeq, CosMX, CyTOF, GenePS SeqFISH, H&E, LC-MS/MS, NULISA, Olink Explore HT, Olink Flex, Olink Focus, Olink Reveal, Olink Target 48, Olink Target 96, RNASeq, SNP array, SomaScan, VDJSeq, Visium, WES, WGS, Xenium, feature barcode sequencing, flow cytometry, imaging mass cytometry, imaging mass spectrometry, kiloplex, multiplexed ELISA, scRNASeq, scVDJSeq, serial IHC, snATACSeq, snRNASeq","","True","","","list like error","string_list","","","","" +"10xProbeSetReference","Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.","Flex Human Transcriptome Probe Set v1.0.1, Flex Human Transcriptome Probe Set v1.1.0, Visium Human Transcriptome v1, Visium Human Transcriptome v2, custom probe set","","True","","","","string","","","","" +"platform","The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.","BD FACSAria Fusion cell sorter, BD FACSAria III, BD FACSCanto, BD FACSCanto II, BD FACSDiscover A8, BD FACSDiscover S8, BD FACSLyric Clinical, BD FACSMelody, BD FACSymphony S6, BD LSRFortessa, Chromium Controller, Chromium GEM-X Single Cell 3' Chip v4, Chromium Next GEM Chip G, Chromium Next GEM Chip H, Chromium Next GEM Chip K, Chromium Next GEM Chip M, Chromium Next GEM Chip Q, Chromium X, Chromium Xo, Chromium iX, CyTOF XT, Cytek Aurora, Cytek Aurora Evo, Fluidigm BioMark, GEM-X Flex Gene Expression Chip, GEM-X OCM 5' Chip, Helios Mass Cytometer, Hyperion, Illumina HiSeq 2500, Illumina HiSeq X Ten, Illumina NextSeq 500, Illumina NovaSeq 6000, Illumina NovaSeq X, Not Applicable, Olink Signature Q100, Sony MA900, Thermo Fisher Attune CytPix, Thermo Fisher Attune NxT, Thermo Fisher Attune Xenith, Visium CytAssist, Xenium, none, unknown","","True","","","list like error","string_list","","","","" +"specimenModality","Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens","multispecimen, single specimen, unknown","","True","","","","string","","","","" +"individualID","Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.","","","True","","","#ClinicalMetadataTemplate unique error^^#BiospecimenMetadataTemplate str^^list like error","string_list","","","","" +"biospecimenID","A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.","","","True","","","#BiospecimenMetadataTemplate unique error^^list like error","string_list","","","","" +"parentBiospecimenID","The biospecimenID associated with the originating biospecimen for derived or child biospecimens.","","","False","","","","string","","","","" +"biospecimenType","A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.","PBMCs, cell line, fibroblast-like synoviocyte, kidney biopsy, none, plasma, primary cell culture, saliva, salivary gland, serum, skin biopsy, skin swab, stool, suction blister cells, suction blister fluid, synovial fluid, synovial tissue, total leukocytes, urine, uvea, whole blood","","True","","","#BiospecimenMetadataTemplate str^^#InVitroBiospecimenMetadataTemplate^^list like error","string_list","","","","" +"primaryCellSource","A label indicating the biological source material from which a primary cell culture was derived.","PBMCs, kidney, pannus-derived dermis, pannus-derived epidermis, salivary gland, synovial tissue, total leukocytes, urine, uvea, whole blood","","True","","","","string","","","","" +"metadataType","A label further classifying the content of metadata resource.","assay, biospecimen, cell coordinates, data dictionary, file manifest, medication, other, phenotype, protocol, single-cell metadata, target panel, template, tissue microarray map, tissue multiarray map, user manual","","True","","","","string","","","","" +"codingLanguage","The coding, aka programming, language(s) contained in the file marked with `resourceType = code`. Select all that apply.","C, C#, C++, Fortan, Java, Julia, Matlab, Python, R, Ruby, SAS","","True","","","","string","","","","" +"visitID","Ordinal ID distinguishing different patient visits.","","","True","","","","string","","","","" +"softwareAndVersion","Relevant software and version used to generate the data file.","BD FACSDiva 8.0.1, Cell Ranger 9.0.1, Cell Ranger ATAC v1.1.0, Cell Ranger v3.0.0, Cell Ranger v3.0.1, Cell Ranger v3.0.2, Cell Ranger v3.1.0, Cell Ranger v4.0.0, Cell Ranger v5.0.0, Cell Ranger v5.0.1, Cell Ranger v6.0.0, Cell Ranger v6.0.1, Cell Ranger v6.0.2, Cell Ranger v6.1.0, Cell Ranger v6.1.1, Cell Ranger v6.1.2, Cell Ranger v7.0.0, Cell Ranger v7.0.1, Cell Ranger v7.1.0, Cell Ranger v7.2.0, Cell Ranger v8.0.0, Cell Ranger v8.0.1, Cell Ranger v9.0.0, Space Ranger 3.0.0, Space Ranger 3.0.1, Space Ranger 3.1.0, Space Ranger 3.1.1, Space Ranger 3.1.2, Space Ranger 3.1.3, demuxlet","","False","","","","string","","","","" +"targetPanel","A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.","","","True","","","","string","","","","" +"targetPanelSize","The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).","","","False","","","int error","integer","","","","" +"nucleicAcidSource","The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.","BCR mRNA, CRISPR protospacer feature barcode, TCR mRNA, Tn5-accessible gDNA, antigen capture barcode, gDNA, globin-depleted RNA, intracellular protein feature barcode, multiplexing oligo, poly(A) RNA, rRNA-depleted RNA, surface protein feature barcode","","True","","","list like error","string_list","","","","" +"biospecimenSubtype","Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.","FFPE tissue, PFA-fixed tissue, cell or tissue lysate, cell suspension, flow-sorted cells, fresh tissue, frozen tissue, nuclei suspension, supernatant","","False","","","","string","","","","" +"cellRangerOutput","10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz","Not Applicable, filtered MEX, filtered_feature_bc_matrix, filtered_peak_bc_matrix, raw MEX, raw_feature_bc_matrix, raw_peak_bc_matrix","","True","","","","string","","","","" +"alignmentReference","The genomic/transcriptomic reference used for performing read alignment against.","10x Cell Ranger Human GRCh38 2020-A, 10x Cell Ranger Human GRCh38 2024-A, GRCh38, modified GRCh38, unknown, vdj_GRCh38_alts_ensembl-4.0.0","","True","","","","string","","","","" +"libraryPrepMethod","Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.","10x Chromium Fixed RNA Human Transcriptome, 10x Chromium GEM-X Single Cell 3' v4, 10x Chromium GEM-X Single Cell 5' v3, 10x Chromium Next GEM Single Cell 3', 10x Chromium Next GEM Single Cell 3' 3.1, 10x Chromium Next GEM Single Cell 5' v1.1, 10x Chromium Next GEM Single Cell 5' v2, 10x Chromium Next GEM Single Cell ATAC v2, 10x Chromium Single Cell Human BCR, 10x Chromium Single Cell Human TCR, 10x GEM-X Flex Gene Expression Human, 10x GEM-X Universal 5' Gene Expression v3, CEL-Seq2, Chromium Next GEM Single Cell ATAC v1.1, Fluidigm C1 HT, NEBNext Human Immune Sequencing Kit, NEBNext Ultra II Directional RNA Library, Nextera XT, Nextera XT DNA, QIAseq miRNA Library, SMART-Seq Human BCR with UMI, SMART-Seq Human TCR with UMI, SMART-Seq v4 Ultra Low Input RNA, SMARTer Stranded Total RNA v2, Takara Human BCR profiling for Illumina, Takara Human TCR profiling for Illumina, Takara Human TCRv2 profiling for Illumina, Takara Human scTCR profiling for Illumina, TruSeq Stranded mRNA, custom DASH-treatment, in-house library prep","","True","","","","string","","","","" +"datasetType","High-level classification of dataset entity distinguishing between datasets compiled for a specific publication or as a general data resource.","experimental, publication","","True","","","","string","","","","" +"acknowledgmentStatement","A Dataset-specific attribute specifying the path to the wiki subpage within the ARK Portal - backend project that contains the acknowledgement statement that must be included in publications using data from the given dataset as a stipulation of the conditions of use.","syn26710600/wiki/619685","","True","","","","string","","","","" +"ARKRelease","A Dataset-specific attribute specifying the ARK Portal release in which this dataset was first made available to the public.","1.0, 2.0, 2024.06.R1, 2024.07.R1, 2024.08.R1, 2024.09.R1, 2024.10.R1, 2024.12.R1, 2025.01.R1, 2025.02.R1, 2025.03.R1, 2025.04.R1, 2025.05.R1, 2025.06.R1, 2025.07.R1, 2025.08.R1, 2025.09.R1, 2025.10.R1, 2025.11.R1, 2025.12.R1","","True","","","","string","","","","" +"RObjectClass","Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.","ROCR prediction.object, Seurat object, SummarizedExperiment, Symphony reference, data.frame, list, matrix, sparse matrix, vector","","False","","","","string","","","","" +"diagnosis","A high-level classifier indicating the disease status of an individual.","At-Risk RA, Not Applicable, OA, RA, SLE, Sjogren's disease, control, cutaneous lupus erythematosus, dermatomyositis, discoid lupus erythematosus, lupus nephritis, psoriasis, psoriatic arthritis, scleroderma, unknown, vitiligo","","True","","","list like error","string_list","","","","" +"libraryID","A library label or name, unique within an experiment, used to distinguish sequencing libraries.","","","True","","","","string","","","","" +"associatedCodeURL","A URL to the repository where associated code is available.","","","False","","","","string","uri","","","" +"FMAID","Functional Model of Anatomy ontology ID corresponding to the anatomical site and origin of the biospecimen. This attribute is assigned by ARK Portal data managers.","","","True","","https://bioportal.bioontology.org/ontologies/FMA/?p=summary","int error","integer","","","","" +"skinSiteStatus","Disease manifestation status of skin biospecimen.","healthy control, lesional, lesional proximal, non-lesional","","True","","","","string","","","","" +"salivaCollectionProcedure","Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.","stimulated, unstimulated","","True","","","","string","","","","" +"synovialCollectionProcedure","Classification of procedure for synovial tissue collection.","arthroplasty, biopsy, synovectomy, unknown","","True","","","","string","","","","" +"publicationType","General classification of publication.","correction, peer-reviewed, pre-print","","True","","","","string","","","","" +"title","Title of the publication.","","","True","","","","string","","","","" +"journal","Journal in which the publication was released","","","True","","","","string","","","","" +"publicationDate","The publication date extracted from PubMed database","","","True","","","","string","","","","" +"DOI","Digital object identifier","","","True","","","","string","uri-reference","","","" +"anatomicalSite","The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.","left 2nd MCP joint, left ankle joint, left hip joint, left knee joint, left wrist joint, other site, right 1st MTP joint, right 2nd MCP joint, right 2nd MTP joint, right 3rd MCP joint, right ankle joint, right hip joint, right knee joint, right wrist joint, unknown","","True","","","","string","","","","" +"vitiligoPattern","A high-level classification of vitiligo based on the distribution and patterning of lesions across the body.","mixed, non-segmental, segmental, unclassified","","False","","","","string","","","","" +"vitiligoPhenotype","Classification of vitiligo lesions which correlate with autoimmune activity and result in specific skin and depigmentation manifestations at the lesion site. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list.","active, confetti, inflammatory, none, trichrome","","True","","","list like error","string_list","","","","" +"psoriasisType","General type classification of psoriasis disease manifestation.","erythrodermic, guttate, inverse, plaque, pustular","","False","","","","string","","","","" +"ageDiagnosis","Age at which subject was diagnosed with `diagnosis`. If providing this value be sure the unit matches that used for `age`.","","","False","","","num error","number","","","","" +"age","Age at which subject was enrolled in study or age at corresponding visit and data collection event. If value unknown, enter '-1'.","","","True","","","num error","number","","","","" +"ageUnits","The unit of measure used for `ageEnrollment` and `ageDiagnosis`","months, years","","True","","","","string","","","","" +"sex","A textual description of a person's sex at birth.","female, intersex, male, unknown","","True","","","","string","","","","" +"height","Standing height of subject.","","","True","","","num error","number","","","","" +"race","A textual description of a person's race.","American Indian or Alaska Native, Asian, Black or African American, Hispanic, Mixed Race, Native Hawaiian or Other Pacific Islander, White, other, unknown","","False","","","","string","","","","" +"ethnicity","The ethnicity of a person.","Hispanic or Latino, Not Hispanic or Latino, unknown","","False","","","","string","","","","" +"heightUnits","Unit of measure of value provided for `height`.","centimeters, feet, inches, meters","","True","","","","string","","","","" +"weight","Weight of subject. If value unknown, enter '-1'.","","","True","","","num error","number","","","","" +"weightUnits","Abbreviated unit of measure of value provided for `weight`.","g, kg, lb, oz","","True","","","","string","","","","" +"comorbidities","Any diseases or medical condition that is simultaneously present in addition to `diagnosis`.","Hashimoto's Thyroiditis, autoimmune thyroid disease, cardiovascular disease, diabetes, inflammatory bowel disease, multiple sclerosis, other, psoriasis, psoriatic arthritis, pulmonary disease, rheumatoid arthritis, systemic lupus erythematosus","","False","","","list like error","string_list","","","","" +"diabetesType","Type of diabetes mellitus.","gestational, type 1, type 2, unknown","","False","","","","string","","","","" +"VIDA","Vitiligo Disease Activity Score, a six-point scale that evaluates the activity of vitiligo","","","False","","","","string","","","","" +"VASI","Total body Vitiligo Area Severity Index, a measure of the percentage of vitiligo involvement across the body calculated in terms of hand units.","","","False","","","","string","","","","" +"VETI","Vitiligo Extent Tensity Index measures the extent of vitiligo by a numerical score and combines analysis of extensity and severity of vitiligo to produce a constant and reproducible number.","","","False","","https://doi.org/10.5826/dpc.0404a18","num error","number","","","","" +"PASI","Psoriasis Area and Severity Index, is a measurement of the discoloration, thickness, scaling, and coverage of psoriasis plaques.","","","False","","","","string","","","","" +"CDASI","Cutaneous Dermatomyositis Disease Area and Severity Index, is a measurement used to characterize cutaneous dermatomyositis severity.","","","False","","","","string","","","","" +"inputCellCount","An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.","","","True","","","int error","integer","","","","" +"totalReads","Total number of reads sequenced from the library.","","","True","","","int error","integer","","","","" +"FACSPopulation","A description of the marker gating strategy used to derive the population cells with FACS.","","","True","","","","string","","","","" +"cellType","The cell type name from Cell Ontology for the corresponding CL identifier.","","","False","","","","string","","","","" +"userDefinedCellType","User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.","","","False","","","","string","","","","" +"krennInflammatory","A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennLining","A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennStroma","A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennSynovitisScore","The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"datasetStatus","A categorical label indicating the status of an ARK Portal dataset. This is applied to improve downstream management of datasets as well as various ETL workflows.","deprecated, released, test, under peer review, unreleased","","True","","","","string","","","","" +"sampleProcessingBatch","A label indicating batching of sample processing or preparation that occurs prior to data collection.","","","False","","","","string","","","","" +"dataCollectionBatch","A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.","","","False","","","","string","","","","" +"metadataStandards","Metadata standards used to generate the metadata","ARK data model, user-defined","","False","","","","string","","","","" +"skinSunExposure","For skin-based biospecimen, this attribute indicates whether the sample was collected from an anatomical site that does or does not receive routine sun exposure.","not sun exposed, sun exposed","","False","","","","string","","","","" +"sequencingSpikeIn","Pre-made sequencing libraries may be added to your sequencing run to improve sequencing results. If such a 'spike-in' library was used, this attribute specifies the name of the library and the percentage of the spike-in library used in the sequencing run.","","","False","","","","string","","","","" +"librarySpikeIn","Pre-made collections of nucleic acid fragments can be added to samples during the library construction process to improve downstream quantification and statistical analyses. If such a 'spike-in' was used, this attribute specifies the name and manufacturer of the spike-in.","","","False","","","","string","","","","" +"readLength","The number of base pairs (bp) sequenced for reads in a fastq file.","","","False","","","int error","integer","","","","" +"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","programPhase","False","","","","","","","","" +"plateID","An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.","","","False","","","","string","","","","" +"processedDataType","A label used for file annotations to provide a brief description of the processed data file.","barcode counts, differential expression results, epigenomic peaks, gene counts","","False","","","list like error","string_list","","","","" +"eventCount","The total number of events detected and captured in the corresponding FCS file. In cytometry, an event corresponds to particles detected and measured by the instrument. This number corresponds to the number of rows in the FCS file and is expected to be a whole integer number.","","","False","","","int error","integer","","","","" +"notes","Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.","","","False","","","","string","","","","" +"species","The genus species of sample or subject origin.","Homo sapiens","","True","","","","string","","","","" +"treatment","A short descriptive label indicating what experimental treatments have been applied to a biospecimen before data capture. Please include the word 'control' to indicate specific treatments that are intended to serve as a control group.","","","True","","","","string","","","","" +"treatmentTimepoint","Where applicable, specify the timepoint relative to treatment to distinguish different sets of samples that have undergone the same treatment but for different lengths of time. REQUIRED: please specify the unit of time, e.g., secs, mins, hrs, days, etc.","","","False","","","","string","","","","" +"slideID","A distinct label or name, unique within an experiment, assigned to an imaging slides.","","","True","","","","string","","","","" +"altSampleID","An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.","","","False","","","","string","","","","" +"sampleCollectionBatch","A label indicating batching of sample collection or experiment execution that occurs prior to data collection.","","","False","","","","string","","","","" +"associatedAccession","This is a File and Dataset annotation attribute indicating additional accessions (i.e., unique identifiers) associated with the data when the data has also been submitted to or can be found in other repositories such as GEO, SRA, dbGaP, etc.","","","False","","","list like error","string_list","","","","" +"percentCellViability","A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.","","","True","","","inRange 50 100 error","integer","","50","100","" +"sequencingSaturation","A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.","","","False","","https://kb.10xgenomics.com/hc/en-us/articles/115005062366-What-is-sequencing-saturation","inRange 0 1 error","number","","0","1","" +"publicationSynID","The synID of the corresponding Synapse entity that stores metadata about the publication. This is used to differentiate publication-specific files, often consisting of level 4 processed data and expanded subject metadata, in a publication dataset that also includes raw or minimally processed files from experimental datasets. This provides an easy way to distinguish and select for the publication-specific data from which the research findings were derived. When this attribute is used to annotate a Dataset it serves as a way to directly link the Dataset entity with the publication metadata stored in Synapse.","","","False","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"associatedDataset","The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.","","","False","","","list like::regex search ^syn[0-9]{8} error","string_list","","","","^syn[0-9]{8}" +"custom10xProbeSetSynID","If custom modified probe sets were used for collecting 10x Chromium Flex scRNA-seq data, then the probe reference files should be grouped as a zip or tar archive and uploaded as metadata. This attribute links the experimental data back to the probe set archive file via a synapse ID.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"targetPanelSynID","In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"datasetDescription","A Dataset-specific attribute specifying the synID of the folder that contains a wiki write-up of the dataset description. This wiki content will be surfaced on the ARK Portal frontend site.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"DOID","Disease ontology identifier associated with `diagnosis`. Attribute values are applied by ARK Portal data managers.","","","True","","","regex search ^DOID error","string","","","","^DOID" +"BRENDA","BRENDA Tissue and Enzyme Source Ontology ID (BTO) corresponding to the `biospecimenType`. Attribute values are applied by ARK Portal data managers.","","","True","","https://bioportal.bioontology.org/ontologies/BTO","regex search ^BTO error","string","","","","^BTO" +"year","Year (YYYY) in which the paper was published.","","","True","","","regex search [1-2][0-9]{3} error","string","","","","[1-2][0-9]{3}" +"PMID","PubMed(R) Identifier","","","True","","","regex search ^PMID error","string","","","","^PMID" +"ImmPortAccession","Accession to corresponding information in ImmPort.","","","False","","","regex search ^SDY error","string","","","","^SDY" +"cellOntologyID","Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.","","","False","","","regex search ^CL: error","string","","","","^CL:" +"PMCID","Pubmed Central Identifier, formatted as a compact URI string that will automatically resolve into a URL based on the Synapse bioregistry; e.g., pmc:PMCxxxxxxxx","","","False","","","regex search ^pmc:PMC[0-9]{8} error","string","","","","^pmc:PMC[0-9]{8}" diff --git a/ark.metadata_template_primary_keys.csv b/ark.metadata_template_primary_keys.csv new file mode 100644 index 00000000..7db96db3 --- /dev/null +++ b/ark.metadata_template_primary_keys.csv @@ -0,0 +1,13 @@ +template,primary_key +BiospecimenMetadataTemplate,biospecimenID +InVitroBiospecimenMetadataTemplate,biospecimenID +ClinicalMetadataTemplate,individualID +BulkATAC-seqAssayMetadataTemplate, +BulkRNA-seqAssayMetadataTemplate, +CyTOFAssayMetadataTemplate, +PublicationMetadataTemplate, +ScRNASeqAssayMetadataTemplate, +SnATAC-seqAssayMetadataTemplate, +SnRNASeqAssayMetadataTemplate, +OlinkAssayMetadataTemplate,plateID +SpatialImagingAssayMetadataTemplate,slideID diff --git a/model_contexts/biospecimen/ark.biospecimen_context.csv b/model_contexts/biospecimen/ark.biospecimen_context.csv index 69c2185d..b17f2f70 100644 --- a/model_contexts/biospecimen/ark.biospecimen_context.csv +++ b/model_contexts/biospecimen/ark.biospecimen_context.csv @@ -1,17 +1,20 @@ -"Attribute","Description","Valid Values","DependsOn","Properties","Required","Parent","DependsOn Component","Source","Validation Rules","columnType" -"Biospecimen Metadata Template","A general template outlining metadata to be collected for biospecimen profiled in a dataset.","","Component, program, project, individualID, biospecimenID, parentBiospecimenID, altSampleID, biospecimenType, biospecimenSubtype, notes, sampleCollectionBatch","","","","","","","" -"In Vitro Biospecimen Metadata Template","A template outlining metadata to be collected for biospecimen used for an in vitro experiment.","","Component, program, project, individualID, biospecimenID, parentBiospecimenID, altSampleID, biospecimenType, biospecimenSubtype, treatment, treatmentTimepoint, notes, sampleCollectionBatch","","","","","","","" -"skin swab","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","skinSiteStatus, anatomicalSite","","","","","","","" -"skin biopsy","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","skinSiteStatus, anatomicalSite","","","","","","","" -"suction blister cells","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","skinSiteStatus, anatomicalSite","","","","","","","" -"suction blister fluid","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","skinSiteStatus, anatomicalSite","","","","","","","" -"saliva","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","salivaCollectionProcedure","","","","","","","" -"synovial tissue","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","synovialCollectionProcedure, anatomicalSite, krennInflammatory, krennLining, krennStroma, krennSynovitisScore","","","","","","","" -"synovial fluid","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","synovialCollectionProcedure, anatomicalSite","","","","","","","" -"flow-sorted cells","A valid value of 'biospecimenSubtype' that triggers conditional dependencies for additional attributes.","","FACSPopulation, cellOntologyID, cellType, userDefinedCellType","","","","","","","" -"cell suspension","A valid value of 'biospecimenSubtype' that triggers conditional dependencies for additional attributes.","","cellOntologyID, cellType, userDefinedCellType","","","","","","","" -"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" -"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" -"primary cell culture","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","cellType, cellOntologyID, primaryCellSource","","","","","","","" -"cell line","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","cellType, cellOntologyID","","","","","","","" -"individualID","","","","","FALSE","","","","#BiospecimenMetadataTemplate required^^","string" +"Attribute","Description","Valid Values","DependsOn","Required","Parent","Source","Validation Rules","columnType","Format","Minimum","Maximum","Pattern" +"Biospecimen Metadata Template","A general template outlining metadata to be collected for biospecimen profiled in a dataset.","","Component, program, project, individualID, biospecimenID, parentBiospecimenID, altSampleID, biospecimenType, biospecimenSubtype, notes, sampleCollectionBatch","","","","","","","","","" +"In Vitro Biospecimen Metadata Template","A template outlining metadata to be collected for biospecimen used for an in vitro experiment.","","Component, program, project, individualID, biospecimenID, parentBiospecimenID, altSampleID, biospecimenType, biospecimenSubtype, treatment, treatmentTimepoint, notes, sampleCollectionBatch","","","","","","","","","" +"skin swab","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","skinSiteStatus, anatomicalSite","","","","","","","","","" +"skin biopsy","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","skinSiteStatus, anatomicalSite","","","","","","","","","" +"suction blister cells","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","skinSiteStatus, anatomicalSite","","","","","","","","","" +"suction blister fluid","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","skinSiteStatus, anatomicalSite","","","","","","","","","" +"saliva","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","salivaCollectionProcedure","","","","","","","","","" +"synovial tissue","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","synovialCollectionProcedure, anatomicalSite, krennInflammatory, krennLining, krennStroma, krennSynovitisScore","","","","","","","","","" +"synovial fluid","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","synovialCollectionProcedure, anatomicalSite","","","","","","","","","" +"flow-sorted cells","A valid value of 'biospecimenSubtype' that triggers conditional dependencies for additional attributes.","","FACSPopulation, cellOntologyID, cellType, userDefinedCellType","","","","","","","","","" +"cell suspension","A valid value of 'biospecimenSubtype' that triggers conditional dependencies for additional attributes.","","cellOntologyID, cellType, userDefinedCellType","","","","","","","","","" +"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" +"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" +"primary cell culture","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","cellType, cellOntologyID, primaryCellSource","","","","","","","","","" +"cell line","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","cellType, cellOntologyID","","","","","","","","","" +"individualID","","","","TRUE","","","","string","","","","" +"biospecimenID","","","","TRUE","","","","string","","","","" +"biospecimenType","","PBMCs, cell line, fibroblast-like synoviocyte, kidney biopsy, none, plasma, primary cell culture, saliva, salivary gland, serum, skin biopsy, skin swab, stool, suction blister cells, suction blister fluid, synovial fluid, synovial tissue, total leukocytes, urine, uvea, whole blood","","TRUE","","","","string","","","","" +"project","","AIM for RA, ELLIPSS, LOCKIT, RA, SLE, STAMP, UMass V-CoRT","","TRUE","","","","string","","","","" diff --git a/model_contexts/biospecimen/ark.biospecimen_model.csv b/model_contexts/biospecimen/ark.biospecimen_model.csv index f537a912..6dbbbebe 100644 --- a/model_contexts/biospecimen/ark.biospecimen_model.csv +++ b/model_contexts/biospecimen/ark.biospecimen_model.csv @@ -1,124 +1,124 @@ -"Attribute","Description","Valid Values","DependsOn","Properties","Required","Parent","DependsOn Component","Source","Validation Rules","columnType" -"Biospecimen Metadata Template","A general template outlining metadata to be collected for biospecimen profiled in a dataset.","","Component, program, project, individualID, biospecimenID, parentBiospecimenID, altSampleID, biospecimenType, biospecimenSubtype, notes, sampleCollectionBatch","","","","","","","" -"In Vitro Biospecimen Metadata Template","A template outlining metadata to be collected for biospecimen used for an in vitro experiment.","","Component, program, project, individualID, biospecimenID, parentBiospecimenID, altSampleID, biospecimenType, biospecimenSubtype, treatment, treatmentTimepoint, notes, sampleCollectionBatch","","","","","","","" -"skin swab","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","skinSiteStatus, anatomicalSite","","","","","","","" -"skin biopsy","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","skinSiteStatus, anatomicalSite","","","","","","","" -"suction blister cells","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","skinSiteStatus, anatomicalSite","","","","","","","" -"suction blister fluid","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","skinSiteStatus, anatomicalSite","","","","","","","" -"saliva","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","salivaCollectionProcedure","","","","","","","" -"synovial tissue","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","synovialCollectionProcedure, anatomicalSite, krennInflammatory, krennLining, krennStroma, krennSynovitisScore","","","","","","","" -"synovial fluid","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","synovialCollectionProcedure, anatomicalSite","","","","","","","" -"flow-sorted cells","A valid value of 'biospecimenSubtype' that triggers conditional dependencies for additional attributes.","","FACSPopulation, cellOntologyID, cellType, userDefinedCellType","","","","","","","" -"cell suspension","A valid value of 'biospecimenSubtype' that triggers conditional dependencies for additional attributes.","","cellOntologyID, cellType, userDefinedCellType","","","","","","","" -"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" -"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" -"primary cell culture","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","cellType, cellOntologyID, primaryCellSource","","","","","","","" -"cell line","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","cellType, cellOntologyID","","","","","","","" -"individualID","Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.","","","","FALSE","","","","#BiospecimenMetadataTemplate required^^","string" -"Component","A high-level attribute for grouping attributes into templates.","","","","True","","","","","string" -"fileFormat","Standard file format name or file extension","bai, bam, bed, bim, csv, czi, docx, dose, erate, fam, fastq, fcs, geojson, h5, h5ad, info, mcd, mtx, parquet, pdf, py, rds, rec, svs, tbi, tgz, tsv, txt, vcf, xls, xlsx, zip","","","True","","","","","string" -"publicationSynID","The synID of the corresponding Synapse entity that stores metadata about the publication. This is used to differentiate publication-specific files, often consisting of level 4 processed data and expanded subject metadata, in a publication dataset that also includes raw or minimally processed files from experimental datasets. This provides an easy way to distinguish and select for the publication-specific data from which the research findings were derived. When this attribute is used to annotate a Dataset it serves as a way to directly link the Dataset entity with the publication metadata stored in Synapse.","","","","False","","","","regex search ^syn[0-9]{8} error","string" -"associatedDataset","The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.","","","","False","","","","list like::regex search ^syn[0-9]{8} error","string" -"program","Name of the funding program that supported the generation of data and associated files","AMP AIM, AMP RA/SLE, Community Contribution","","","True","","","","","string" -"project","A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.","AIM for RA, ELLIPSS, LOCKIT, RA, SLE, STAMP, UMass V-CoRT","","","True","","","","list like error","string" -"programPhase","A label noting which AMP RA/SLE program phase generated the data.","I, II","","","True","","","","list like error","string" -"resourceType","High-level classification of the file content","code, experimental data, figure, metadata","","","True","","","","","string" -"dataType","High-level classification of the type of data contained in the file, loosely related to the experimental method or biological entity that is being profiled. Select all that apply using a comma-delimited list, though in most cases only a single label is expected. For multimodal datasets with concomitant profiling of biospecimen include 'multimodal'.","cytometry, epigenomics, genomics, histology, immune repertoire profiling, immunostaining, lipidomics, metabolomics, microbiome, multimodal, proteomics, transcriptomics","","","True","","","","list like error","string" -"dataSubtype","General classification to differentiate between omics profiling modalities. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list, however this is largely only expected in the context of Datasets and files that contain integrated experimental data spanning multiple types.","bulk, none, pseudobulk, single-cell, single-nucleus, spatial","","","True","","","","list like error","string" -"dataLevel","Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.","1, 2, 3, 4, 5","","","True","","","","","string" -"assay","The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.","ASAPSeq, CE-MS, CITESeq, CosMX, CyTOF, GenePS SeqFISH, H&E, LC-MS/MS, NULISA, Olink Explore HT, Olink Flex, Olink Focus, Olink Reveal, Olink Target 48, Olink Target 96, RNASeq, SNP array, SomaScan, VDJSeq, Visium, WES, WGS, Xenium, feature barcode sequencing, flow cytometry, imaging mass cytometry, imaging mass spectrometry, kiloplex, multiplexed ELISA, scRNASeq, scVDJSeq, serial IHC, snATACSeq, snRNASeq","","","True","","","","list like error","string" -"10xProbeSetReference","Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.","Flex Human Transcriptome Probe Set v1.0.1, Flex Human Transcriptome Probe Set v1.1.0, Visium Human Transcriptome v1, Visium Human Transcriptome v2, custom probe set","","","True","","","","","string" -"custom10xProbeSetSynID","If custom modified probe sets were used for collecting 10x Chromium Flex scRNA-seq data, then the probe reference files should be grouped as a zip or tar archive and uploaded as metadata. This attribute links the experimental data back to the probe set archive file via a synapse ID.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"platform","The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.","BD FACSAria Fusion cell sorter, BD FACSAria III, BD FACSCanto, BD FACSCanto II, BD FACSDiscover A8, BD FACSDiscover S8, BD FACSLyric Clinical, BD FACSMelody, BD FACSymphony S6, BD LSRFortessa, Chromium Controller, Chromium GEM-X Single Cell 3' Chip v4, Chromium Next GEM Chip G, Chromium Next GEM Chip H, Chromium Next GEM Chip K, Chromium Next GEM Chip M, Chromium Next GEM Chip Q, Chromium X, Chromium Xo, Chromium iX, CyTOF XT, Cytek Aurora, Cytek Aurora Evo, Fluidigm BioMark, GEM-X Flex Gene Expression Chip, GEM-X OCM 5' Chip, Helios Mass Cytometer, Hyperion, Illumina HiSeq 2500, Illumina HiSeq X Ten, Illumina NextSeq 500, Illumina NovaSeq 6000, Illumina NovaSeq X, Not Applicable, Olink Signature Q100, Sony MA900, Thermo Fisher Attune CytPix, Thermo Fisher Attune NxT, Thermo Fisher Attune Xenith, Visium CytAssist, Xenium, none, unknown","","","True","","","","list like error","string" -"specimenModality","Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens","multispecimen, single specimen, unknown","","","True","","","","","string" -"biospecimenID","A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.","","","","True","","","","#BiospecimenMetadataTemplate unique error^^list like error","string" -"parentBiospecimenID","The biospecimenID associated with the originating biospecimen for derived or child biospecimens.","","","","False","","","","","string" -"biospecimenType","A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.","PBMCs, cell line, fibroblast-like synoviocyte, kidney biopsy, none, plasma, primary cell culture, saliva, salivary gland, serum, skin biopsy, skin swab, stool, suction blister cells, suction blister fluid, synovial fluid, synovial tissue, total leukocytes, urine, uvea, whole blood","","","True","","","","#BiospecimenMetadataTemplate str^^#InVitroBiospecimenMetadataTemplate^^list like error","string" -"primaryCellSource","A label indicating the biological source material from which a primary cell culture was derived.","PBMCs, kidney, pannus-derived dermis, pannus-derived epidermis, salivary gland, synovial tissue, total leukocytes, urine, uvea, whole blood","","","True","","","","","string" -"metadataType","A label further classifying the content of metadata resource.","assay, biospecimen, cell coordinates, data dictionary, file manifest, medication, other, phenotype, protocol, single-cell metadata, target panel, template, tissue microarray map, tissue multiarray map, user manual","","","True","","","","","string" -"codingLanguage","The coding, aka programming, language(s) contained in the file marked with `resourceType = code`. Select all that apply.","C, C#, C++, Fortan, Java, Julia, Matlab, Python, R, Ruby, SAS","","","True","","","","","string" -"visitID","Ordinal ID distinguishing different patient visits.","","","","True","","","","","string" -"softwareAndVersion","Relevant software and version used to generate the data file.","BD FACSDiva 8.0.1, Cell Ranger 9.0.1, Cell Ranger ATAC v1.1.0, Cell Ranger v3.0.0, Cell Ranger v3.0.1, Cell Ranger v3.0.2, Cell Ranger v3.1.0, Cell Ranger v4.0.0, Cell Ranger v5.0.0, Cell Ranger v5.0.1, Cell Ranger v6.0.0, Cell Ranger v6.0.1, Cell Ranger v6.0.2, Cell Ranger v6.1.0, Cell Ranger v6.1.1, Cell Ranger v6.1.2, Cell Ranger v7.0.0, Cell Ranger v7.0.1, Cell Ranger v7.1.0, Cell Ranger v7.2.0, Cell Ranger v8.0.0, Cell Ranger v8.0.1, Cell Ranger v9.0.0, Space Ranger 3.0.0, Space Ranger 3.0.1, Space Ranger 3.1.0, Space Ranger 3.1.1, Space Ranger 3.1.2, Space Ranger 3.1.3, demuxlet","","","False","","","","","string" -"targetPanel","A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.","","","","True","","","","","string" -"targetPanelSynID","In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"targetPanelSize","The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).","","","","False","","","","int error","integer" -"nucleicAcidSource","The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.","BCR mRNA, CRISPR protospacer feature barcode, TCR mRNA, Tn5-accessible gDNA, antigen capture barcode, gDNA, globin-depleted RNA, intracellular protein feature barcode, multiplexing oligo, poly(A) RNA, rRNA-depleted RNA, surface protein feature barcode","","","True","","","","list like error","string" -"biospecimenSubtype","Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.","FFPE tissue, PFA-fixed tissue, cell or tissue lysate, cell suspension, flow-sorted cells, fresh tissue, frozen tissue, nuclei suspension, supernatant","","","False","","","","","string" -"cellRangerOutput","10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz","Not Applicable, filtered MEX, filtered_feature_bc_matrix, filtered_peak_bc_matrix, raw MEX, raw_feature_bc_matrix, raw_peak_bc_matrix","","","True","","","","","string" -"alignmentReference","The genomic/transcriptomic reference used for performing read alignment against.","10x Cell Ranger Human GRCh38 2020-A, 10x Cell Ranger Human GRCh38 2024-A, GRCh38, modified GRCh38, unknown, vdj_GRCh38_alts_ensembl-4.0.0","","","True","","","","","string" -"libraryPrepMethod","Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.","10x Chromium Fixed RNA Human Transcriptome, 10x Chromium GEM-X Single Cell 3' v4, 10x Chromium GEM-X Single Cell 5' v3, 10x Chromium Next GEM Single Cell 3', 10x Chromium Next GEM Single Cell 3' 3.1, 10x Chromium Next GEM Single Cell 5' v1.1, 10x Chromium Next GEM Single Cell 5' v2, 10x Chromium Next GEM Single Cell ATAC v2, 10x Chromium Single Cell Human BCR, 10x Chromium Single Cell Human TCR, 10x GEM-X Flex Gene Expression Human, 10x GEM-X Universal 5' Gene Expression v3, CEL-Seq2, Chromium Next GEM Single Cell ATAC v1.1, Fluidigm C1 HT, NEBNext Human Immune Sequencing Kit, NEBNext Ultra II Directional RNA Library, Nextera XT, Nextera XT DNA, QIAseq miRNA Library, SMART-Seq Human BCR with UMI, SMART-Seq Human TCR with UMI, SMART-Seq v4 Ultra Low Input RNA, SMARTer Stranded Total RNA v2, Takara Human BCR profiling for Illumina, Takara Human TCR profiling for Illumina, Takara Human TCRv2 profiling for Illumina, Takara Human scTCR profiling for Illumina, TruSeq Stranded mRNA, custom DASH-treatment, in-house library prep","","","True","","","","","string" -"datasetType","High-level classification of dataset entity distinguishing between datasets compiled for a specific publication or as a general data resource.","experimental, publication","","","True","","","","","string" -"acknowledgmentStatement","A Dataset-specific attribute specifying the path to the wiki subpage within the ARK Portal - backend project that contains the acknowledgement statement that must be included in publications using data from the given dataset as a stipulation of the conditions of use.","syn26710600/wiki/619685","","","True","","","","","string" -"datasetDescription","A Dataset-specific attribute specifying the synID of the folder that contains a wiki write-up of the dataset description. This wiki content will be surfaced on the ARK Portal frontend site.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"ARKRelease","A Dataset-specific attribute specifying the ARK Portal release in which this dataset was first made available to the public.","1.0, 2.0, 2024.06.R1, 2024.07.R1, 2024.08.R1, 2024.09.R1, 2024.10.R1, 2024.12.R1, 2025.01.R1, 2025.02.R1, 2025.03.R1, 2025.04.R1, 2025.05.R1, 2025.06.R1, 2025.07.R1, 2025.08.R1, 2025.09.R1, 2025.10.R1, 2025.11.R1, 2025.12.R1","","","True","","","","","string" -"RObjectClass","Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.","ROCR prediction.object, Seurat object, SummarizedExperiment, Symphony reference, data.frame, list, matrix, sparse matrix, vector","","","False","","","","","string" -"diagnosis","A high-level classifier indicating the disease status of an individual.","At-Risk RA, Not Applicable, OA, RA, SLE, Sjogren's disease, control, cutaneous lupus erythematosus, dermatomyositis, discoid lupus erythematosus, lupus nephritis, psoriasis, psoriatic arthritis, scleroderma, unknown, vitiligo","","","True","","","","list like error","string" -"DOID","Disease ontology identifier associated with `diagnosis`. Attribute values are applied by ARK Portal data managers.","","","","True","","","","regex search ^DOID error","string" -"libraryID","A library label or name, unique within an experiment, used to distinguish sequencing libraries.","","","","True","","","","","string" -"associatedCodeURL","A URL to the repository where associated code is available.","","","","False","","","","","string" -"FMAID","Functional Model of Anatomy ontology ID corresponding to the anatomical site and origin of the biospecimen. This attribute is assigned by ARK Portal data managers.","","","","True","","","https://bioportal.bioontology.org/ontologies/FMA/?p=summary","int error","integer" -"BRENDA","BRENDA Tissue and Enzyme Source Ontology ID (BTO) corresponding to the `biospecimenType`. Attribute values are applied by ARK Portal data managers.","","","","True","","","https://bioportal.bioontology.org/ontologies/BTO","regex search ^BTO error","string" -"skinSiteStatus","Disease manifestation status of skin biospecimen.","healthy control, lesional, lesional proximal, non-lesional","","","True","","","","","string" -"salivaCollectionProcedure","Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.","stimulated, unstimulated","","","True","","","","","string" -"synovialCollectionProcedure","Classification of procedure for synovial tissue collection.","arthroplasty, biopsy, synovectomy, unknown","","","True","","","","","string" -"publicationType","General classification of publication.","correction, peer-reviewed, pre-print","","","True","","","","","string" -"title","Title of the publication.","","","","True","","","","","string" -"journal","Journal in which the publication was released","","","","True","","","","","string" -"year","Year (YYYY) in which the paper was published.","","","","True","","","","regex search [1-2][0-9]{3} error","string" -"publicationDate","The publication date extracted from PubMed database","","","","True","","","","","string" -"PMID","PubMed(R) Identifier","","","","True","","","","regex search ^PMID error","string" -"DOI","Digital object identifier","","","","True","","","","","string" -"anatomicalSite","The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.","left 2nd MCP joint, left ankle joint, left hip joint, left knee joint, left wrist joint, other site, right 1st MTP joint, right 2nd MCP joint, right 2nd MTP joint, right 3rd MCP joint, right ankle joint, right hip joint, right knee joint, right wrist joint, unknown","","","True","","","","","string" -"vitiligoPattern","A high-level classification of vitiligo based on the distribution and patterning of lesions across the body.","mixed, non-segmental, segmental, unclassified","","","False","","","","","string" -"vitiligoPhenotype","Classification of vitiligo lesions which correlate with autoimmune activity and result in specific skin and depigmentation manifestations at the lesion site. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list.","active, confetti, inflammatory, none, trichrome","","","True","","","","list like error","string" -"psoriasisType","General type classification of psoriasis disease manifestation.","erythrodermic, guttate, inverse, plaque, pustular","","","False","","","","","string" -"ageDiagnosis","Age at which subject was diagnosed with `diagnosis`. If providing this value be sure the unit matches that used for `age`.","","","","False","","","","num error","number" -"age","Age at which subject was enrolled in study or age at corresponding visit and data collection event. If value unknown, enter '-1'.","","","","True","","","","num error","number" -"ageUnits","The unit of measure used for `ageEnrollment` and `ageDiagnosis`","months, years","","","True","","","","","string" -"sex","A textual description of a person's sex at birth.","female, intersex, male, unknown","","","True","","","","","string" -"height","Standing height of subject.","","","","True","","","","num error","number" -"race","A textual description of a person's race.","American Indian or Alaska Native, Asian, Black or African American, Hispanic, Mixed Race, Native Hawaiian or Other Pacific Islander, White, other, unknown","","","False","","","","","string" -"ethnicity","The ethnicity of a person.","Hispanic or Latino, Not Hispanic or Latino, unknown","","","False","","","","","string" -"heightUnits","Unit of measure of value provided for `height`.","centimeters, feet, inches, meters","","","True","","","","","string" -"weight","Weight of subject. If value unknown, enter '-1'.","","","","True","","","","num error","number" -"weightUnits","Abbreviated unit of measure of value provided for `weight`.","g, kg, lb, oz","","","True","","","","","string" -"comorbidities","Any diseases or medical condition that is simultaneously present in addition to `diagnosis`.","Hashimoto's Thyroiditis, autoimmune thyroid disease, cardiovascular disease, diabetes, inflammatory bowel disease, multiple sclerosis, other, psoriasis, psoriatic arthritis, pulmonary disease, rheumatoid arthritis, systemic lupus erythematosus","","","False","","","","list like error","string" -"diabetesType","Type of diabetes mellitus.","gestational, type 1, type 2, unknown","","","False","","","","","string" -"VIDA","Vitiligo Disease Activity Score, a six-point scale that evaluates the activity of vitiligo","","","","False","","","","","string" -"VASI","Total body Vitiligo Area Severity Index, a measure of the percentage of vitiligo involvement across the body calculated in terms of hand units.","","","","False","","","","","string" -"VETI","Vitiligo Extent Tensity Index measures the extent of vitiligo by a numerical score and combines analysis of extensity and severity of vitiligo to produce a constant and reproducible number.","","","","False","","","https://doi.org/10.5826/dpc.0404a18","num error","number" -"PASI","Psoriasis Area and Severity Index, is a measurement of the discoloration, thickness, scaling, and coverage of psoriasis plaques.","","","","False","","","","","string" -"CDASI","Cutaneous Dermatomyositis Disease Area and Severity Index, is a measurement used to characterize cutaneous dermatomyositis severity.","","","","False","","","","","string" -"percentCellViability","A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.","","","","True","","","","inRange 50 100 error","integer" -"inputCellCount","An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.","","","","True","","","","int error","integer" -"totalReads","Total number of reads sequenced from the library.","","","","True","","","","int error","integer" -"sequencingSaturation","A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.","","","","False","","","https://kb.10xgenomics.com/hc/en-us/articles/115005062366-What-is-sequencing-saturation","inRange 0 1 error","number" -"ImmPortAccession","Accession to corresponding information in ImmPort.","","","","False","","","","regex search ^SDY error","string" -"FACSPopulation","A description of the marker gating strategy used to derive the population cells with FACS.","","","","True","","","","","string" -"cellOntologyID","Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.","","","","False","","","","regex search ^CL: error","string" -"cellType","The cell type name from Cell Ontology for the corresponding CL identifier.","","","","False","","","","","string" -"userDefinedCellType","User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.","","","","False","","","","","string" -"krennInflammatory","A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennLining","A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennStroma","A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennSynovitisScore","The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"datasetStatus","A categorical label indicating the status of an ARK Portal dataset. This is applied to improve downstream management of datasets as well as various ETL workflows.","deprecated, released, test, under peer review, unreleased","","","True","","","","","string" -"sampleProcessingBatch","A label indicating batching of sample processing or preparation that occurs prior to data collection.","","","","False","","","","","string" -"dataCollectionBatch","A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.","","","","False","","","","","string" -"metadataStandards","Metadata standards used to generate the metadata","ARK data model, user-defined","","","False","","","","","string" -"skinSunExposure","For skin-based biospecimen, this attribute indicates whether the sample was collected from an anatomical site that does or does not receive routine sun exposure.","not sun exposed, sun exposed","","","False","","","","","string" -"sequencingSpikeIn","Pre-made sequencing libraries may be added to your sequencing run to improve sequencing results. If such a 'spike-in' library was used, this attribute specifies the name of the library and the percentage of the spike-in library used in the sequencing run.","","","","False","","","","","string" -"librarySpikeIn","Pre-made collections of nucleic acid fragments can be added to samples during the library construction process to improve downstream quantification and statistical analyses. If such a 'spike-in' was used, this attribute specifies the name and manufacturer of the spike-in.","","","","False","","","","","string" -"readLength","The number of base pairs (bp) sequenced for reads in a fastq file.","","","","False","","","","int error","integer" -"plateID","An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.","","","","False","","","","","string" -"PMCID","Pubmed Central Identifier, formatted as a compact URI string that will automatically resolve into a URL based on the Synapse bioregistry; e.g., pmc:PMCxxxxxxxx","","","","False","","","","regex search ^pmc:PMC[0-9]{8} error","string" -"processedDataType","A label used for file annotations to provide a brief description of the processed data file.","barcode counts, differential expression results, epigenomic peaks, gene counts","","","False","","","","list like error","string" -"eventCount","The total number of events detected and captured in the corresponding FCS file. In cytometry, an event corresponds to particles detected and measured by the instrument. This number corresponds to the number of rows in the FCS file and is expected to be a whole integer number.","","","","False","","","","int error","integer" -"notes","Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.","","","","False","","","","","string" -"species","The genus species of sample or subject origin.","Homo sapiens","","","True","","","","","string" -"treatment","A short descriptive label indicating what experimental treatments have been applied to a biospecimen before data capture. Please include the word 'control' to indicate specific treatments that are intended to serve as a control group.","","","","True","","","","","string" -"treatmentTimepoint","Where applicable, specify the timepoint relative to treatment to distinguish different sets of samples that have undergone the same treatment but for different lengths of time. REQUIRED: please specify the unit of time, e.g., secs, mins, hrs, days, etc.","","","","False","","","","","string" -"slideID","A distinct label or name, unique within an experiment, assigned to an imaging slides.","","","","True","","","","","string" -"altSampleID","An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.","","","","False","","","","","string" -"sampleCollectionBatch","A label indicating batching of sample collection or experiment execution that occurs prior to data collection.","","","","False","","","","","string" -"associatedAccession","This is a File and Dataset annotation attribute indicating additional accessions (i.e., unique identifiers) associated with the data when the data has also been submitted to or can be found in other repositories such as GEO, SRA, dbGaP, etc.","","","","False","","","","list like error","string" +"Attribute","Description","Valid Values","DependsOn","Required","Parent","Source","Validation Rules","columnType","Format","Minimum","Maximum","Pattern" +"Biospecimen Metadata Template","A general template outlining metadata to be collected for biospecimen profiled in a dataset.","","Component, program, project, individualID, biospecimenID, parentBiospecimenID, altSampleID, biospecimenType, biospecimenSubtype, notes, sampleCollectionBatch","","","","","","","","","" +"In Vitro Biospecimen Metadata Template","A template outlining metadata to be collected for biospecimen used for an in vitro experiment.","","Component, program, project, individualID, biospecimenID, parentBiospecimenID, altSampleID, biospecimenType, biospecimenSubtype, treatment, treatmentTimepoint, notes, sampleCollectionBatch","","","","","","","","","" +"skin swab","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","skinSiteStatus, anatomicalSite","","","","","","","","","" +"skin biopsy","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","skinSiteStatus, anatomicalSite","","","","","","","","","" +"suction blister cells","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","skinSiteStatus, anatomicalSite","","","","","","","","","" +"suction blister fluid","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","skinSiteStatus, anatomicalSite","","","","","","","","","" +"saliva","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","salivaCollectionProcedure","","","","","","","","","" +"synovial tissue","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","synovialCollectionProcedure, anatomicalSite, krennInflammatory, krennLining, krennStroma, krennSynovitisScore","","","","","","","","","" +"synovial fluid","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","synovialCollectionProcedure, anatomicalSite","","","","","","","","","" +"flow-sorted cells","A valid value of 'biospecimenSubtype' that triggers conditional dependencies for additional attributes.","","FACSPopulation, cellOntologyID, cellType, userDefinedCellType","","","","","","","","","" +"cell suspension","A valid value of 'biospecimenSubtype' that triggers conditional dependencies for additional attributes.","","cellOntologyID, cellType, userDefinedCellType","","","","","","","","","" +"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" +"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" +"primary cell culture","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","cellType, cellOntologyID, primaryCellSource","","","","","","","","","" +"cell line","Is a valid value of 'biospecimenType' that triggers conditional dependencies for additional attributes.","","cellType, cellOntologyID","","","","","","","","","" +"individualID","Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.","","","TRUE","","","","string","","","","" +"biospecimenID","A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.","","","TRUE","","","","string","","","","" +"biospecimenType","A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.","PBMCs, cell line, fibroblast-like synoviocyte, kidney biopsy, none, plasma, primary cell culture, saliva, salivary gland, serum, skin biopsy, skin swab, stool, suction blister cells, suction blister fluid, synovial fluid, synovial tissue, total leukocytes, urine, uvea, whole blood","","TRUE","","","","string","","","","" +"project","A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.","AIM for RA, ELLIPSS, LOCKIT, RA, SLE, STAMP, UMass V-CoRT","","TRUE","","","","string","","","","" +"Component","A high-level attribute for grouping attributes into templates.","","","True","","","","string","","","","" +"fileFormat","Standard file format name or file extension","bai, bam, bed, bim, csv, czi, docx, dose, erate, fam, fastq, fcs, geojson, h5, h5ad, info, mcd, mtx, parquet, pdf, py, rds, rec, svs, tbi, tgz, tsv, txt, vcf, xls, xlsx, zip","","True","","","","string","","","","" +"program","Name of the funding program that supported the generation of data and associated files","AMP AIM, AMP RA/SLE, Community Contribution","","True","","","","string","","","","" +"programPhase","A label noting which AMP RA/SLE program phase generated the data.","I, II","","True","","","list like error","string_list","","","","" +"resourceType","High-level classification of the file content","code, experimental data, figure, metadata","","True","","","","string","","","","" +"dataType","High-level classification of the type of data contained in the file, loosely related to the experimental method or biological entity that is being profiled. Select all that apply using a comma-delimited list, though in most cases only a single label is expected. For multimodal datasets with concomitant profiling of biospecimen include 'multimodal'.","cytometry, epigenomics, genomics, histology, immune repertoire profiling, immunostaining, lipidomics, metabolomics, microbiome, multimodal, proteomics, transcriptomics","","True","","","list like error","string_list","","","","" +"dataSubtype","General classification to differentiate between omics profiling modalities. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list, however this is largely only expected in the context of Datasets and files that contain integrated experimental data spanning multiple types.","bulk, none, pseudobulk, single-cell, single-nucleus, spatial","","True","","","list like error","string_list","","","","" +"dataLevel","Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.","1, 2, 3, 4, 5","","True","","","","string","","","","" +"assay","The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.","ASAPSeq, CE-MS, CITESeq, CosMX, CyTOF, GenePS SeqFISH, H&E, LC-MS/MS, NULISA, Olink Explore HT, Olink Flex, Olink Focus, Olink Reveal, Olink Target 48, Olink Target 96, RNASeq, SNP array, SomaScan, VDJSeq, Visium, WES, WGS, Xenium, feature barcode sequencing, flow cytometry, imaging mass cytometry, imaging mass spectrometry, kiloplex, multiplexed ELISA, scRNASeq, scVDJSeq, serial IHC, snATACSeq, snRNASeq","","True","","","list like error","string_list","","","","" +"10xProbeSetReference","Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.","Flex Human Transcriptome Probe Set v1.0.1, Flex Human Transcriptome Probe Set v1.1.0, Visium Human Transcriptome v1, Visium Human Transcriptome v2, custom probe set","","True","","","","string","","","","" +"platform","The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.","BD FACSAria Fusion cell sorter, BD FACSAria III, BD FACSCanto, BD FACSCanto II, BD FACSDiscover A8, BD FACSDiscover S8, BD FACSLyric Clinical, BD FACSMelody, BD FACSymphony S6, BD LSRFortessa, Chromium Controller, Chromium GEM-X Single Cell 3' Chip v4, Chromium Next GEM Chip G, Chromium Next GEM Chip H, Chromium Next GEM Chip K, Chromium Next GEM Chip M, Chromium Next GEM Chip Q, Chromium X, Chromium Xo, Chromium iX, CyTOF XT, Cytek Aurora, Cytek Aurora Evo, Fluidigm BioMark, GEM-X Flex Gene Expression Chip, GEM-X OCM 5' Chip, Helios Mass Cytometer, Hyperion, Illumina HiSeq 2500, Illumina HiSeq X Ten, Illumina NextSeq 500, Illumina NovaSeq 6000, Illumina NovaSeq X, Not Applicable, Olink Signature Q100, Sony MA900, Thermo Fisher Attune CytPix, Thermo Fisher Attune NxT, Thermo Fisher Attune Xenith, Visium CytAssist, Xenium, none, unknown","","True","","","list like error","string_list","","","","" +"specimenModality","Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens","multispecimen, single specimen, unknown","","True","","","","string","","","","" +"parentBiospecimenID","The biospecimenID associated with the originating biospecimen for derived or child biospecimens.","","","False","","","","string","","","","" +"primaryCellSource","A label indicating the biological source material from which a primary cell culture was derived.","PBMCs, kidney, pannus-derived dermis, pannus-derived epidermis, salivary gland, synovial tissue, total leukocytes, urine, uvea, whole blood","","True","","","","string","","","","" +"metadataType","A label further classifying the content of metadata resource.","assay, biospecimen, cell coordinates, data dictionary, file manifest, medication, other, phenotype, protocol, single-cell metadata, target panel, template, tissue microarray map, tissue multiarray map, user manual","","True","","","","string","","","","" +"codingLanguage","The coding, aka programming, language(s) contained in the file marked with `resourceType = code`. Select all that apply.","C, C#, C++, Fortan, Java, Julia, Matlab, Python, R, Ruby, SAS","","True","","","","string","","","","" +"visitID","Ordinal ID distinguishing different patient visits.","","","True","","","","string","","","","" +"softwareAndVersion","Relevant software and version used to generate the data file.","BD FACSDiva 8.0.1, Cell Ranger 9.0.1, Cell Ranger ATAC v1.1.0, Cell Ranger v3.0.0, Cell Ranger v3.0.1, Cell Ranger v3.0.2, Cell Ranger v3.1.0, Cell Ranger v4.0.0, Cell Ranger v5.0.0, Cell Ranger v5.0.1, Cell Ranger v6.0.0, Cell Ranger v6.0.1, Cell Ranger v6.0.2, Cell Ranger v6.1.0, Cell Ranger v6.1.1, Cell Ranger v6.1.2, Cell Ranger v7.0.0, Cell Ranger v7.0.1, Cell Ranger v7.1.0, Cell Ranger v7.2.0, Cell Ranger v8.0.0, Cell Ranger v8.0.1, Cell Ranger v9.0.0, Space Ranger 3.0.0, Space Ranger 3.0.1, Space Ranger 3.1.0, Space Ranger 3.1.1, Space Ranger 3.1.2, Space Ranger 3.1.3, demuxlet","","False","","","","string","","","","" +"targetPanel","A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.","","","True","","","","string","","","","" +"targetPanelSize","The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).","","","False","","","int error","integer","","","","" +"nucleicAcidSource","The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.","BCR mRNA, CRISPR protospacer feature barcode, TCR mRNA, Tn5-accessible gDNA, antigen capture barcode, gDNA, globin-depleted RNA, intracellular protein feature barcode, multiplexing oligo, poly(A) RNA, rRNA-depleted RNA, surface protein feature barcode","","True","","","list like error","string_list","","","","" +"biospecimenSubtype","Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.","FFPE tissue, PFA-fixed tissue, cell or tissue lysate, cell suspension, flow-sorted cells, fresh tissue, frozen tissue, nuclei suspension, supernatant","","False","","","","string","","","","" +"cellRangerOutput","10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz","Not Applicable, filtered MEX, filtered_feature_bc_matrix, filtered_peak_bc_matrix, raw MEX, raw_feature_bc_matrix, raw_peak_bc_matrix","","True","","","","string","","","","" +"alignmentReference","The genomic/transcriptomic reference used for performing read alignment against.","10x Cell Ranger Human GRCh38 2020-A, 10x Cell Ranger Human GRCh38 2024-A, GRCh38, modified GRCh38, unknown, vdj_GRCh38_alts_ensembl-4.0.0","","True","","","","string","","","","" +"libraryPrepMethod","Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.","10x Chromium Fixed RNA Human Transcriptome, 10x Chromium GEM-X Single Cell 3' v4, 10x Chromium GEM-X Single Cell 5' v3, 10x Chromium Next GEM Single Cell 3', 10x Chromium Next GEM Single Cell 3' 3.1, 10x Chromium Next GEM Single Cell 5' v1.1, 10x Chromium Next GEM Single Cell 5' v2, 10x Chromium Next GEM Single Cell ATAC v2, 10x Chromium Single Cell Human BCR, 10x Chromium Single Cell Human TCR, 10x GEM-X Flex Gene Expression Human, 10x GEM-X Universal 5' Gene Expression v3, CEL-Seq2, Chromium Next GEM Single Cell ATAC v1.1, Fluidigm C1 HT, NEBNext Human Immune Sequencing Kit, NEBNext Ultra II Directional RNA Library, Nextera XT, Nextera XT DNA, QIAseq miRNA Library, SMART-Seq Human BCR with UMI, SMART-Seq Human TCR with UMI, SMART-Seq v4 Ultra Low Input RNA, SMARTer Stranded Total RNA v2, Takara Human BCR profiling for Illumina, Takara Human TCR profiling for Illumina, Takara Human TCRv2 profiling for Illumina, Takara Human scTCR profiling for Illumina, TruSeq Stranded mRNA, custom DASH-treatment, in-house library prep","","True","","","","string","","","","" +"datasetType","High-level classification of dataset entity distinguishing between datasets compiled for a specific publication or as a general data resource.","experimental, publication","","True","","","","string","","","","" +"acknowledgmentStatement","A Dataset-specific attribute specifying the path to the wiki subpage within the ARK Portal - backend project that contains the acknowledgement statement that must be included in publications using data from the given dataset as a stipulation of the conditions of use.","syn26710600/wiki/619685","","True","","","","string","","","","" +"ARKRelease","A Dataset-specific attribute specifying the ARK Portal release in which this dataset was first made available to the public.","1.0, 2.0, 2024.06.R1, 2024.07.R1, 2024.08.R1, 2024.09.R1, 2024.10.R1, 2024.12.R1, 2025.01.R1, 2025.02.R1, 2025.03.R1, 2025.04.R1, 2025.05.R1, 2025.06.R1, 2025.07.R1, 2025.08.R1, 2025.09.R1, 2025.10.R1, 2025.11.R1, 2025.12.R1","","True","","","","string","","","","" +"RObjectClass","Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.","ROCR prediction.object, Seurat object, SummarizedExperiment, Symphony reference, data.frame, list, matrix, sparse matrix, vector","","False","","","","string","","","","" +"diagnosis","A high-level classifier indicating the disease status of an individual.","At-Risk RA, Not Applicable, OA, RA, SLE, Sjogren's disease, control, cutaneous lupus erythematosus, dermatomyositis, discoid lupus erythematosus, lupus nephritis, psoriasis, psoriatic arthritis, scleroderma, unknown, vitiligo","","True","","","list like error","string_list","","","","" +"libraryID","A library label or name, unique within an experiment, used to distinguish sequencing libraries.","","","True","","","","string","","","","" +"associatedCodeURL","A URL to the repository where associated code is available.","","","False","","","","string","uri","","","" +"FMAID","Functional Model of Anatomy ontology ID corresponding to the anatomical site and origin of the biospecimen. This attribute is assigned by ARK Portal data managers.","","","True","","https://bioportal.bioontology.org/ontologies/FMA/?p=summary","int error","integer","","","","" +"skinSiteStatus","Disease manifestation status of skin biospecimen.","healthy control, lesional, lesional proximal, non-lesional","","True","","","","string","","","","" +"salivaCollectionProcedure","Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.","stimulated, unstimulated","","True","","","","string","","","","" +"synovialCollectionProcedure","Classification of procedure for synovial tissue collection.","arthroplasty, biopsy, synovectomy, unknown","","True","","","","string","","","","" +"publicationType","General classification of publication.","correction, peer-reviewed, pre-print","","True","","","","string","","","","" +"title","Title of the publication.","","","True","","","","string","","","","" +"journal","Journal in which the publication was released","","","True","","","","string","","","","" +"publicationDate","The publication date extracted from PubMed database","","","True","","","","string","","","","" +"DOI","Digital object identifier","","","True","","","","string","uri-reference","","","" +"anatomicalSite","The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.","left 2nd MCP joint, left ankle joint, left hip joint, left knee joint, left wrist joint, other site, right 1st MTP joint, right 2nd MCP joint, right 2nd MTP joint, right 3rd MCP joint, right ankle joint, right hip joint, right knee joint, right wrist joint, unknown","","True","","","","string","","","","" +"vitiligoPattern","A high-level classification of vitiligo based on the distribution and patterning of lesions across the body.","mixed, non-segmental, segmental, unclassified","","False","","","","string","","","","" +"vitiligoPhenotype","Classification of vitiligo lesions which correlate with autoimmune activity and result in specific skin and depigmentation manifestations at the lesion site. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list.","active, confetti, inflammatory, none, trichrome","","True","","","list like error","string_list","","","","" +"psoriasisType","General type classification of psoriasis disease manifestation.","erythrodermic, guttate, inverse, plaque, pustular","","False","","","","string","","","","" +"ageDiagnosis","Age at which subject was diagnosed with `diagnosis`. If providing this value be sure the unit matches that used for `age`.","","","False","","","num error","number","","","","" +"age","Age at which subject was enrolled in study or age at corresponding visit and data collection event. If value unknown, enter '-1'.","","","True","","","num error","number","","","","" +"ageUnits","The unit of measure used for `ageEnrollment` and `ageDiagnosis`","months, years","","True","","","","string","","","","" +"sex","A textual description of a person's sex at birth.","female, intersex, male, unknown","","True","","","","string","","","","" +"height","Standing height of subject.","","","True","","","num error","number","","","","" +"race","A textual description of a person's race.","American Indian or Alaska Native, Asian, Black or African American, Hispanic, Mixed Race, Native Hawaiian or Other Pacific Islander, White, other, unknown","","False","","","","string","","","","" +"ethnicity","The ethnicity of a person.","Hispanic or Latino, Not Hispanic or Latino, unknown","","False","","","","string","","","","" +"heightUnits","Unit of measure of value provided for `height`.","centimeters, feet, inches, meters","","True","","","","string","","","","" +"weight","Weight of subject. If value unknown, enter '-1'.","","","True","","","num error","number","","","","" +"weightUnits","Abbreviated unit of measure of value provided for `weight`.","g, kg, lb, oz","","True","","","","string","","","","" +"comorbidities","Any diseases or medical condition that is simultaneously present in addition to `diagnosis`.","Hashimoto's Thyroiditis, autoimmune thyroid disease, cardiovascular disease, diabetes, inflammatory bowel disease, multiple sclerosis, other, psoriasis, psoriatic arthritis, pulmonary disease, rheumatoid arthritis, systemic lupus erythematosus","","False","","","list like error","string_list","","","","" +"diabetesType","Type of diabetes mellitus.","gestational, type 1, type 2, unknown","","False","","","","string","","","","" +"VIDA","Vitiligo Disease Activity Score, a six-point scale that evaluates the activity of vitiligo","","","False","","","","string","","","","" +"VASI","Total body Vitiligo Area Severity Index, a measure of the percentage of vitiligo involvement across the body calculated in terms of hand units.","","","False","","","","string","","","","" +"VETI","Vitiligo Extent Tensity Index measures the extent of vitiligo by a numerical score and combines analysis of extensity and severity of vitiligo to produce a constant and reproducible number.","","","False","","https://doi.org/10.5826/dpc.0404a18","num error","number","","","","" +"PASI","Psoriasis Area and Severity Index, is a measurement of the discoloration, thickness, scaling, and coverage of psoriasis plaques.","","","False","","","","string","","","","" +"CDASI","Cutaneous Dermatomyositis Disease Area and Severity Index, is a measurement used to characterize cutaneous dermatomyositis severity.","","","False","","","","string","","","","" +"inputCellCount","An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.","","","True","","","int error","integer","","","","" +"totalReads","Total number of reads sequenced from the library.","","","True","","","int error","integer","","","","" +"FACSPopulation","A description of the marker gating strategy used to derive the population cells with FACS.","","","True","","","","string","","","","" +"cellType","The cell type name from Cell Ontology for the corresponding CL identifier.","","","False","","","","string","","","","" +"userDefinedCellType","User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.","","","False","","","","string","","","","" +"krennInflammatory","A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennLining","A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennStroma","A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennSynovitisScore","The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"datasetStatus","A categorical label indicating the status of an ARK Portal dataset. This is applied to improve downstream management of datasets as well as various ETL workflows.","deprecated, released, test, under peer review, unreleased","","True","","","","string","","","","" +"sampleProcessingBatch","A label indicating batching of sample processing or preparation that occurs prior to data collection.","","","False","","","","string","","","","" +"dataCollectionBatch","A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.","","","False","","","","string","","","","" +"metadataStandards","Metadata standards used to generate the metadata","ARK data model, user-defined","","False","","","","string","","","","" +"skinSunExposure","For skin-based biospecimen, this attribute indicates whether the sample was collected from an anatomical site that does or does not receive routine sun exposure.","not sun exposed, sun exposed","","False","","","","string","","","","" +"sequencingSpikeIn","Pre-made sequencing libraries may be added to your sequencing run to improve sequencing results. If such a 'spike-in' library was used, this attribute specifies the name of the library and the percentage of the spike-in library used in the sequencing run.","","","False","","","","string","","","","" +"librarySpikeIn","Pre-made collections of nucleic acid fragments can be added to samples during the library construction process to improve downstream quantification and statistical analyses. If such a 'spike-in' was used, this attribute specifies the name and manufacturer of the spike-in.","","","False","","","","string","","","","" +"readLength","The number of base pairs (bp) sequenced for reads in a fastq file.","","","False","","","int error","integer","","","","" +"plateID","An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.","","","False","","","","string","","","","" +"processedDataType","A label used for file annotations to provide a brief description of the processed data file.","barcode counts, differential expression results, epigenomic peaks, gene counts","","False","","","list like error","string_list","","","","" +"eventCount","The total number of events detected and captured in the corresponding FCS file. In cytometry, an event corresponds to particles detected and measured by the instrument. This number corresponds to the number of rows in the FCS file and is expected to be a whole integer number.","","","False","","","int error","integer","","","","" +"notes","Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.","","","False","","","","string","","","","" +"species","The genus species of sample or subject origin.","Homo sapiens","","True","","","","string","","","","" +"treatment","A short descriptive label indicating what experimental treatments have been applied to a biospecimen before data capture. Please include the word 'control' to indicate specific treatments that are intended to serve as a control group.","","","True","","","","string","","","","" +"treatmentTimepoint","Where applicable, specify the timepoint relative to treatment to distinguish different sets of samples that have undergone the same treatment but for different lengths of time. REQUIRED: please specify the unit of time, e.g., secs, mins, hrs, days, etc.","","","False","","","","string","","","","" +"slideID","A distinct label or name, unique within an experiment, assigned to an imaging slides.","","","True","","","","string","","","","" +"altSampleID","An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.","","","False","","","","string","","","","" +"sampleCollectionBatch","A label indicating batching of sample collection or experiment execution that occurs prior to data collection.","","","False","","","","string","","","","" +"associatedAccession","This is a File and Dataset annotation attribute indicating additional accessions (i.e., unique identifiers) associated with the data when the data has also been submitted to or can be found in other repositories such as GEO, SRA, dbGaP, etc.","","","False","","","list like error","string_list","","","","" +"percentCellViability","A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.","","","True","","","inRange 50 100 error","integer","","50","100","" +"sequencingSaturation","A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.","","","False","","https://kb.10xgenomics.com/hc/en-us/articles/115005062366-What-is-sequencing-saturation","inRange 0 1 error","number","","0","1","" +"publicationSynID","The synID of the corresponding Synapse entity that stores metadata about the publication. This is used to differentiate publication-specific files, often consisting of level 4 processed data and expanded subject metadata, in a publication dataset that also includes raw or minimally processed files from experimental datasets. This provides an easy way to distinguish and select for the publication-specific data from which the research findings were derived. When this attribute is used to annotate a Dataset it serves as a way to directly link the Dataset entity with the publication metadata stored in Synapse.","","","False","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"associatedDataset","The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.","","","False","","","list like::regex search ^syn[0-9]{8} error","string_list","","","","^syn[0-9]{8}" +"custom10xProbeSetSynID","If custom modified probe sets were used for collecting 10x Chromium Flex scRNA-seq data, then the probe reference files should be grouped as a zip or tar archive and uploaded as metadata. This attribute links the experimental data back to the probe set archive file via a synapse ID.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"targetPanelSynID","In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"datasetDescription","A Dataset-specific attribute specifying the synID of the folder that contains a wiki write-up of the dataset description. This wiki content will be surfaced on the ARK Portal frontend site.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"DOID","Disease ontology identifier associated with `diagnosis`. Attribute values are applied by ARK Portal data managers.","","","True","","","regex search ^DOID error","string","","","","^DOID" +"BRENDA","BRENDA Tissue and Enzyme Source Ontology ID (BTO) corresponding to the `biospecimenType`. Attribute values are applied by ARK Portal data managers.","","","True","","https://bioportal.bioontology.org/ontologies/BTO","regex search ^BTO error","string","","","","^BTO" +"year","Year (YYYY) in which the paper was published.","","","True","","","regex search [1-2][0-9]{3} error","string","","","","[1-2][0-9]{3}" +"PMID","PubMed(R) Identifier","","","True","","","regex search ^PMID error","string","","","","^PMID" +"ImmPortAccession","Accession to corresponding information in ImmPort.","","","False","","","regex search ^SDY error","string","","","","^SDY" +"cellOntologyID","Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.","","","False","","","regex search ^CL: error","string","","","","^CL:" +"PMCID","Pubmed Central Identifier, formatted as a compact URI string that will automatically resolve into a URL based on the Synapse bioregistry; e.g., pmc:PMCxxxxxxxx","","","False","","","regex search ^pmc:PMC[0-9]{8} error","string","","","","^pmc:PMC[0-9]{8}" diff --git a/model_contexts/clinical/ark.clinical_context.csv b/model_contexts/clinical/ark.clinical_context.csv index 7d1d8e71..f2946a7b 100644 --- a/model_contexts/clinical/ark.clinical_context.csv +++ b/model_contexts/clinical/ark.clinical_context.csv @@ -1,8 +1,10 @@ -"Attribute","Description","Valid Values","DependsOn","Properties","Required","Parent","DependsOn Component","Source","Validation Rules","columnType" -"Clinical Metadata Template","A template outlining clinical metadata to collect for study subjects.","","Component, program, project, species, individualID, diagnosis, age, ageUnits, sex, race, ethnicity, height, heightUnits, weight, weightUnits, comorbidities","","","","","","","" -"vitiligo","A valid value of `diagnosis` that triggers conditional dependencies for additional attributes.","","vitiligoPattern, VIDA, VASI, VETI","","","","","","","" -"diabetes","A valid value of `comorbidities` that triggers conditional dependencies for additional attributes.","","diabetesType","","","","","","","" -"psoriasis","A valid value of `diagnosis` that triggers conditional dependencies for additional attributes.","","PASI","","","","","","","" -"dermatomyositis","A valid value of `diagnosis` that triggers conditional dependencies for additional attributes.","","CDASI","","","","","","","" -"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" -"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" +"Attribute","Description","Valid Values","DependsOn","Required","Parent","Source","Validation Rules","columnType","Format","Minimum","Maximum","Pattern" +"Clinical Metadata Template","A template outlining clinical metadata to collect for study subjects.","","Component, program, project, species, individualID, diagnosis, age, ageUnits, sex, race, ethnicity, height, heightUnits, weight, weightUnits, comorbidities","","","","","","","","","" +"vitiligo","A valid value of `diagnosis` that triggers conditional dependencies for additional attributes.","","vitiligoPattern, VIDA, VASI, VETI","","","","","","","","","" +"diabetes","A valid value of `comorbidities` that triggers conditional dependencies for additional attributes.","","diabetesType","","","","","","","","","" +"psoriasis","A valid value of `diagnosis` that triggers conditional dependencies for additional attributes.","","PASI","","","","","","","","","" +"dermatomyositis","A valid value of `diagnosis` that triggers conditional dependencies for additional attributes.","","CDASI","","","","","","","","","" +"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" +"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" +"individualID","","","","TRUE","","","","string","","","","" +"project","","AIM for RA, ELLIPSS, LOCKIT, RA, SLE, STAMP, UMass V-CoRT","","TRUE","","","","string","","","","" diff --git a/model_contexts/clinical/ark.clinical_model.csv b/model_contexts/clinical/ark.clinical_model.csv index fa6e4dd5..ce208164 100644 --- a/model_contexts/clinical/ark.clinical_model.csv +++ b/model_contexts/clinical/ark.clinical_model.csv @@ -1,116 +1,116 @@ -"Attribute","Description","Valid Values","DependsOn","Properties","Required","Parent","DependsOn Component","Source","Validation Rules","columnType" -"Clinical Metadata Template","A template outlining clinical metadata to collect for study subjects.","","Component, program, project, species, individualID, diagnosis, age, ageUnits, sex, race, ethnicity, height, heightUnits, weight, weightUnits, comorbidities","","","","","","","" -"vitiligo","A valid value of `diagnosis` that triggers conditional dependencies for additional attributes.","","vitiligoPattern, VIDA, VASI, VETI","","","","","","","" -"diabetes","A valid value of `comorbidities` that triggers conditional dependencies for additional attributes.","","diabetesType","","","","","","","" -"psoriasis","A valid value of `diagnosis` that triggers conditional dependencies for additional attributes.","","PASI","","","","","","","" -"dermatomyositis","A valid value of `diagnosis` that triggers conditional dependencies for additional attributes.","","CDASI","","","","","","","" -"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" -"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" -"Component","A high-level attribute for grouping attributes into templates.","","","","True","","","","","string" -"fileFormat","Standard file format name or file extension","bai, bam, bed, bim, csv, czi, docx, dose, erate, fam, fastq, fcs, geojson, h5, h5ad, info, mcd, mtx, parquet, pdf, py, rds, rec, svs, tbi, tgz, tsv, txt, vcf, xls, xlsx, zip","","","True","","","","","string" -"publicationSynID","The synID of the corresponding Synapse entity that stores metadata about the publication. This is used to differentiate publication-specific files, often consisting of level 4 processed data and expanded subject metadata, in a publication dataset that also includes raw or minimally processed files from experimental datasets. This provides an easy way to distinguish and select for the publication-specific data from which the research findings were derived. When this attribute is used to annotate a Dataset it serves as a way to directly link the Dataset entity with the publication metadata stored in Synapse.","","","","False","","","","regex search ^syn[0-9]{8} error","string" -"associatedDataset","The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.","","","","False","","","","list like::regex search ^syn[0-9]{8} error","string" -"program","Name of the funding program that supported the generation of data and associated files","AMP AIM, AMP RA/SLE, Community Contribution","","","True","","","","","string" -"project","A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.","AIM for RA, ELLIPSS, LOCKIT, RA, SLE, STAMP, UMass V-CoRT","","","True","","","","list like error","string" -"programPhase","A label noting which AMP RA/SLE program phase generated the data.","I, II","","","True","","","","list like error","string" -"resourceType","High-level classification of the file content","code, experimental data, figure, metadata","","","True","","","","","string" -"dataType","High-level classification of the type of data contained in the file, loosely related to the experimental method or biological entity that is being profiled. Select all that apply using a comma-delimited list, though in most cases only a single label is expected. For multimodal datasets with concomitant profiling of biospecimen include 'multimodal'.","cytometry, epigenomics, genomics, histology, immune repertoire profiling, immunostaining, lipidomics, metabolomics, microbiome, multimodal, proteomics, transcriptomics","","","True","","","","list like error","string" -"dataSubtype","General classification to differentiate between omics profiling modalities. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list, however this is largely only expected in the context of Datasets and files that contain integrated experimental data spanning multiple types.","bulk, none, pseudobulk, single-cell, single-nucleus, spatial","","","True","","","","list like error","string" -"dataLevel","Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.","1, 2, 3, 4, 5","","","True","","","","","string" -"assay","The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.","ASAPSeq, CE-MS, CITESeq, CosMX, CyTOF, GenePS SeqFISH, H&E, LC-MS/MS, NULISA, Olink Explore HT, Olink Flex, Olink Focus, Olink Reveal, Olink Target 48, Olink Target 96, RNASeq, SNP array, SomaScan, VDJSeq, Visium, WES, WGS, Xenium, feature barcode sequencing, flow cytometry, imaging mass cytometry, imaging mass spectrometry, kiloplex, multiplexed ELISA, scRNASeq, scVDJSeq, serial IHC, snATACSeq, snRNASeq","","","True","","","","list like error","string" -"10xProbeSetReference","Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.","Flex Human Transcriptome Probe Set v1.0.1, Flex Human Transcriptome Probe Set v1.1.0, Visium Human Transcriptome v1, Visium Human Transcriptome v2, custom probe set","","","True","","","","","string" -"custom10xProbeSetSynID","If custom modified probe sets were used for collecting 10x Chromium Flex scRNA-seq data, then the probe reference files should be grouped as a zip or tar archive and uploaded as metadata. This attribute links the experimental data back to the probe set archive file via a synapse ID.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"platform","The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.","BD FACSAria Fusion cell sorter, BD FACSAria III, BD FACSCanto, BD FACSCanto II, BD FACSDiscover A8, BD FACSDiscover S8, BD FACSLyric Clinical, BD FACSMelody, BD FACSymphony S6, BD LSRFortessa, Chromium Controller, Chromium GEM-X Single Cell 3' Chip v4, Chromium Next GEM Chip G, Chromium Next GEM Chip H, Chromium Next GEM Chip K, Chromium Next GEM Chip M, Chromium Next GEM Chip Q, Chromium X, Chromium Xo, Chromium iX, CyTOF XT, Cytek Aurora, Cytek Aurora Evo, Fluidigm BioMark, GEM-X Flex Gene Expression Chip, GEM-X OCM 5' Chip, Helios Mass Cytometer, Hyperion, Illumina HiSeq 2500, Illumina HiSeq X Ten, Illumina NextSeq 500, Illumina NovaSeq 6000, Illumina NovaSeq X, Not Applicable, Olink Signature Q100, Sony MA900, Thermo Fisher Attune CytPix, Thermo Fisher Attune NxT, Thermo Fisher Attune Xenith, Visium CytAssist, Xenium, none, unknown","","","True","","","","list like error","string" -"specimenModality","Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens","multispecimen, single specimen, unknown","","","True","","","","","string" -"individualID","Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.","","","","True","","","","#ClinicalMetadataTemplate unique error^^#BiospecimenMetadataTemplate str^^list like error","string" -"biospecimenID","A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.","","","","True","","","","#BiospecimenMetadataTemplate unique error^^list like error","string" -"parentBiospecimenID","The biospecimenID associated with the originating biospecimen for derived or child biospecimens.","","","","False","","","","","string" -"biospecimenType","A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.","PBMCs, cell line, fibroblast-like synoviocyte, kidney biopsy, none, plasma, primary cell culture, saliva, salivary gland, serum, skin biopsy, skin swab, stool, suction blister cells, suction blister fluid, synovial fluid, synovial tissue, total leukocytes, urine, uvea, whole blood","","","True","","","","#BiospecimenMetadataTemplate str^^#InVitroBiospecimenMetadataTemplate^^list like error","string" -"primaryCellSource","A label indicating the biological source material from which a primary cell culture was derived.","PBMCs, kidney, pannus-derived dermis, pannus-derived epidermis, salivary gland, synovial tissue, total leukocytes, urine, uvea, whole blood","","","True","","","","","string" -"metadataType","A label further classifying the content of metadata resource.","assay, biospecimen, cell coordinates, data dictionary, file manifest, medication, other, phenotype, protocol, single-cell metadata, target panel, template, tissue microarray map, tissue multiarray map, user manual","","","True","","","","","string" -"codingLanguage","The coding, aka programming, language(s) contained in the file marked with `resourceType = code`. Select all that apply.","C, C#, C++, Fortan, Java, Julia, Matlab, Python, R, Ruby, SAS","","","True","","","","","string" -"visitID","Ordinal ID distinguishing different patient visits.","","","","True","","","","","string" -"softwareAndVersion","Relevant software and version used to generate the data file.","BD FACSDiva 8.0.1, Cell Ranger 9.0.1, Cell Ranger ATAC v1.1.0, Cell Ranger v3.0.0, Cell Ranger v3.0.1, Cell Ranger v3.0.2, Cell Ranger v3.1.0, Cell Ranger v4.0.0, Cell Ranger v5.0.0, Cell Ranger v5.0.1, Cell Ranger v6.0.0, Cell Ranger v6.0.1, Cell Ranger v6.0.2, Cell Ranger v6.1.0, Cell Ranger v6.1.1, Cell Ranger v6.1.2, Cell Ranger v7.0.0, Cell Ranger v7.0.1, Cell Ranger v7.1.0, Cell Ranger v7.2.0, Cell Ranger v8.0.0, Cell Ranger v8.0.1, Cell Ranger v9.0.0, Space Ranger 3.0.0, Space Ranger 3.0.1, Space Ranger 3.1.0, Space Ranger 3.1.1, Space Ranger 3.1.2, Space Ranger 3.1.3, demuxlet","","","False","","","","","string" -"targetPanel","A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.","","","","True","","","","","string" -"targetPanelSynID","In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"targetPanelSize","The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).","","","","False","","","","int error","integer" -"nucleicAcidSource","The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.","BCR mRNA, CRISPR protospacer feature barcode, TCR mRNA, Tn5-accessible gDNA, antigen capture barcode, gDNA, globin-depleted RNA, intracellular protein feature barcode, multiplexing oligo, poly(A) RNA, rRNA-depleted RNA, surface protein feature barcode","","","True","","","","list like error","string" -"biospecimenSubtype","Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.","FFPE tissue, PFA-fixed tissue, cell or tissue lysate, cell suspension, flow-sorted cells, fresh tissue, frozen tissue, nuclei suspension, supernatant","","","False","","","","","string" -"cellRangerOutput","10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz","Not Applicable, filtered MEX, filtered_feature_bc_matrix, filtered_peak_bc_matrix, raw MEX, raw_feature_bc_matrix, raw_peak_bc_matrix","","","True","","","","","string" -"alignmentReference","The genomic/transcriptomic reference used for performing read alignment against.","10x Cell Ranger Human GRCh38 2020-A, 10x Cell Ranger Human GRCh38 2024-A, GRCh38, modified GRCh38, unknown, vdj_GRCh38_alts_ensembl-4.0.0","","","True","","","","","string" -"libraryPrepMethod","Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.","10x Chromium Fixed RNA Human Transcriptome, 10x Chromium GEM-X Single Cell 3' v4, 10x Chromium GEM-X Single Cell 5' v3, 10x Chromium Next GEM Single Cell 3', 10x Chromium Next GEM Single Cell 3' 3.1, 10x Chromium Next GEM Single Cell 5' v1.1, 10x Chromium Next GEM Single Cell 5' v2, 10x Chromium Next GEM Single Cell ATAC v2, 10x Chromium Single Cell Human BCR, 10x Chromium Single Cell Human TCR, 10x GEM-X Flex Gene Expression Human, 10x GEM-X Universal 5' Gene Expression v3, CEL-Seq2, Chromium Next GEM Single Cell ATAC v1.1, Fluidigm C1 HT, NEBNext Human Immune Sequencing Kit, NEBNext Ultra II Directional RNA Library, Nextera XT, Nextera XT DNA, QIAseq miRNA Library, SMART-Seq Human BCR with UMI, SMART-Seq Human TCR with UMI, SMART-Seq v4 Ultra Low Input RNA, SMARTer Stranded Total RNA v2, Takara Human BCR profiling for Illumina, Takara Human TCR profiling for Illumina, Takara Human TCRv2 profiling for Illumina, Takara Human scTCR profiling for Illumina, TruSeq Stranded mRNA, custom DASH-treatment, in-house library prep","","","True","","","","","string" -"datasetType","High-level classification of dataset entity distinguishing between datasets compiled for a specific publication or as a general data resource.","experimental, publication","","","True","","","","","string" -"acknowledgmentStatement","A Dataset-specific attribute specifying the path to the wiki subpage within the ARK Portal - backend project that contains the acknowledgement statement that must be included in publications using data from the given dataset as a stipulation of the conditions of use.","syn26710600/wiki/619685","","","True","","","","","string" -"datasetDescription","A Dataset-specific attribute specifying the synID of the folder that contains a wiki write-up of the dataset description. This wiki content will be surfaced on the ARK Portal frontend site.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"ARKRelease","A Dataset-specific attribute specifying the ARK Portal release in which this dataset was first made available to the public.","1.0, 2.0, 2024.06.R1, 2024.07.R1, 2024.08.R1, 2024.09.R1, 2024.10.R1, 2024.12.R1, 2025.01.R1, 2025.02.R1, 2025.03.R1, 2025.04.R1, 2025.05.R1, 2025.06.R1, 2025.07.R1, 2025.08.R1, 2025.09.R1, 2025.10.R1, 2025.11.R1, 2025.12.R1","","","True","","","","","string" -"RObjectClass","Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.","ROCR prediction.object, Seurat object, SummarizedExperiment, Symphony reference, data.frame, list, matrix, sparse matrix, vector","","","False","","","","","string" -"diagnosis","A high-level classifier indicating the disease status of an individual.","At-Risk RA, Not Applicable, OA, RA, SLE, Sjogren's disease, control, cutaneous lupus erythematosus, dermatomyositis, discoid lupus erythematosus, lupus nephritis, psoriasis, psoriatic arthritis, scleroderma, unknown, vitiligo","","","True","","","","list like error","string" -"DOID","Disease ontology identifier associated with `diagnosis`. Attribute values are applied by ARK Portal data managers.","","","","True","","","","regex search ^DOID error","string" -"libraryID","A library label or name, unique within an experiment, used to distinguish sequencing libraries.","","","","True","","","","","string" -"associatedCodeURL","A URL to the repository where associated code is available.","","","","False","","","","","string" -"FMAID","Functional Model of Anatomy ontology ID corresponding to the anatomical site and origin of the biospecimen. This attribute is assigned by ARK Portal data managers.","","","","True","","","https://bioportal.bioontology.org/ontologies/FMA/?p=summary","int error","integer" -"BRENDA","BRENDA Tissue and Enzyme Source Ontology ID (BTO) corresponding to the `biospecimenType`. Attribute values are applied by ARK Portal data managers.","","","","True","","","https://bioportal.bioontology.org/ontologies/BTO","regex search ^BTO error","string" -"skinSiteStatus","Disease manifestation status of skin biospecimen.","healthy control, lesional, lesional proximal, non-lesional","","","True","","","","","string" -"salivaCollectionProcedure","Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.","stimulated, unstimulated","","","True","","","","","string" -"synovialCollectionProcedure","Classification of procedure for synovial tissue collection.","arthroplasty, biopsy, synovectomy, unknown","","","True","","","","","string" -"publicationType","General classification of publication.","correction, peer-reviewed, pre-print","","","True","","","","","string" -"title","Title of the publication.","","","","True","","","","","string" -"journal","Journal in which the publication was released","","","","True","","","","","string" -"year","Year (YYYY) in which the paper was published.","","","","True","","","","regex search [1-2][0-9]{3} error","string" -"publicationDate","The publication date extracted from PubMed database","","","","True","","","","","string" -"PMID","PubMed(R) Identifier","","","","True","","","","regex search ^PMID error","string" -"DOI","Digital object identifier","","","","True","","","","","string" -"anatomicalSite","The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.","left 2nd MCP joint, left ankle joint, left hip joint, left knee joint, left wrist joint, other site, right 1st MTP joint, right 2nd MCP joint, right 2nd MTP joint, right 3rd MCP joint, right ankle joint, right hip joint, right knee joint, right wrist joint, unknown","","","True","","","","","string" -"vitiligoPattern","A high-level classification of vitiligo based on the distribution and patterning of lesions across the body.","mixed, non-segmental, segmental, unclassified","","","False","","","","","string" -"vitiligoPhenotype","Classification of vitiligo lesions which correlate with autoimmune activity and result in specific skin and depigmentation manifestations at the lesion site. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list.","active, confetti, inflammatory, none, trichrome","","","True","","","","list like error","string" -"psoriasisType","General type classification of psoriasis disease manifestation.","erythrodermic, guttate, inverse, plaque, pustular","","","False","","","","","string" -"ageDiagnosis","Age at which subject was diagnosed with `diagnosis`. If providing this value be sure the unit matches that used for `age`.","","","","False","","","","num error","number" -"age","Age at which subject was enrolled in study or age at corresponding visit and data collection event. If value unknown, enter '-1'.","","","","True","","","","num error","number" -"ageUnits","The unit of measure used for `ageEnrollment` and `ageDiagnosis`","months, years","","","True","","","","","string" -"sex","A textual description of a person's sex at birth.","female, intersex, male, unknown","","","True","","","","","string" -"height","Standing height of subject.","","","","True","","","","num error","number" -"race","A textual description of a person's race.","American Indian or Alaska Native, Asian, Black or African American, Hispanic, Mixed Race, Native Hawaiian or Other Pacific Islander, White, other, unknown","","","False","","","","","string" -"ethnicity","The ethnicity of a person.","Hispanic or Latino, Not Hispanic or Latino, unknown","","","False","","","","","string" -"heightUnits","Unit of measure of value provided for `height`.","centimeters, feet, inches, meters","","","True","","","","","string" -"weight","Weight of subject. If value unknown, enter '-1'.","","","","True","","","","num error","number" -"weightUnits","Abbreviated unit of measure of value provided for `weight`.","g, kg, lb, oz","","","True","","","","","string" -"comorbidities","Any diseases or medical condition that is simultaneously present in addition to `diagnosis`.","Hashimoto's Thyroiditis, autoimmune thyroid disease, cardiovascular disease, diabetes, inflammatory bowel disease, multiple sclerosis, other, psoriasis, psoriatic arthritis, pulmonary disease, rheumatoid arthritis, systemic lupus erythematosus","","","False","","","","list like error","string" -"diabetesType","Type of diabetes mellitus.","gestational, type 1, type 2, unknown","","","False","","","","","string" -"VIDA","Vitiligo Disease Activity Score, a six-point scale that evaluates the activity of vitiligo","","","","False","","","","","string" -"VASI","Total body Vitiligo Area Severity Index, a measure of the percentage of vitiligo involvement across the body calculated in terms of hand units.","","","","False","","","","","string" -"VETI","Vitiligo Extent Tensity Index measures the extent of vitiligo by a numerical score and combines analysis of extensity and severity of vitiligo to produce a constant and reproducible number.","","","","False","","","https://doi.org/10.5826/dpc.0404a18","num error","number" -"PASI","Psoriasis Area and Severity Index, is a measurement of the discoloration, thickness, scaling, and coverage of psoriasis plaques.","","","","False","","","","","string" -"CDASI","Cutaneous Dermatomyositis Disease Area and Severity Index, is a measurement used to characterize cutaneous dermatomyositis severity.","","","","False","","","","","string" -"percentCellViability","A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.","","","","True","","","","inRange 50 100 error","integer" -"inputCellCount","An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.","","","","True","","","","int error","integer" -"totalReads","Total number of reads sequenced from the library.","","","","True","","","","int error","integer" -"sequencingSaturation","A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.","","","","False","","","https://kb.10xgenomics.com/hc/en-us/articles/115005062366-What-is-sequencing-saturation","inRange 0 1 error","number" -"ImmPortAccession","Accession to corresponding information in ImmPort.","","","","False","","","","regex search ^SDY error","string" -"FACSPopulation","A description of the marker gating strategy used to derive the population cells with FACS.","","","","True","","","","","string" -"cellOntologyID","Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.","","","","False","","","","regex search ^CL: error","string" -"cellType","The cell type name from Cell Ontology for the corresponding CL identifier.","","","","False","","","","","string" -"userDefinedCellType","User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.","","","","False","","","","","string" -"krennInflammatory","A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennLining","A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennStroma","A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennSynovitisScore","The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"datasetStatus","A categorical label indicating the status of an ARK Portal dataset. This is applied to improve downstream management of datasets as well as various ETL workflows.","deprecated, released, test, under peer review, unreleased","","","True","","","","","string" -"sampleProcessingBatch","A label indicating batching of sample processing or preparation that occurs prior to data collection.","","","","False","","","","","string" -"dataCollectionBatch","A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.","","","","False","","","","","string" -"metadataStandards","Metadata standards used to generate the metadata","ARK data model, user-defined","","","False","","","","","string" -"skinSunExposure","For skin-based biospecimen, this attribute indicates whether the sample was collected from an anatomical site that does or does not receive routine sun exposure.","not sun exposed, sun exposed","","","False","","","","","string" -"sequencingSpikeIn","Pre-made sequencing libraries may be added to your sequencing run to improve sequencing results. If such a 'spike-in' library was used, this attribute specifies the name of the library and the percentage of the spike-in library used in the sequencing run.","","","","False","","","","","string" -"librarySpikeIn","Pre-made collections of nucleic acid fragments can be added to samples during the library construction process to improve downstream quantification and statistical analyses. If such a 'spike-in' was used, this attribute specifies the name and manufacturer of the spike-in.","","","","False","","","","","string" -"readLength","The number of base pairs (bp) sequenced for reads in a fastq file.","","","","False","","","","int error","integer" -"plateID","An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.","","","","False","","","","","string" -"PMCID","Pubmed Central Identifier, formatted as a compact URI string that will automatically resolve into a URL based on the Synapse bioregistry; e.g., pmc:PMCxxxxxxxx","","","","False","","","","regex search ^pmc:PMC[0-9]{8} error","string" -"processedDataType","A label used for file annotations to provide a brief description of the processed data file.","barcode counts, differential expression results, epigenomic peaks, gene counts","","","False","","","","list like error","string" -"eventCount","The total number of events detected and captured in the corresponding FCS file. In cytometry, an event corresponds to particles detected and measured by the instrument. This number corresponds to the number of rows in the FCS file and is expected to be a whole integer number.","","","","False","","","","int error","integer" -"notes","Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.","","","","False","","","","","string" -"species","The genus species of sample or subject origin.","Homo sapiens","","","True","","","","","string" -"treatment","A short descriptive label indicating what experimental treatments have been applied to a biospecimen before data capture. Please include the word 'control' to indicate specific treatments that are intended to serve as a control group.","","","","True","","","","","string" -"treatmentTimepoint","Where applicable, specify the timepoint relative to treatment to distinguish different sets of samples that have undergone the same treatment but for different lengths of time. REQUIRED: please specify the unit of time, e.g., secs, mins, hrs, days, etc.","","","","False","","","","","string" -"slideID","A distinct label or name, unique within an experiment, assigned to an imaging slides.","","","","True","","","","","string" -"altSampleID","An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.","","","","False","","","","","string" -"sampleCollectionBatch","A label indicating batching of sample collection or experiment execution that occurs prior to data collection.","","","","False","","","","","string" -"associatedAccession","This is a File and Dataset annotation attribute indicating additional accessions (i.e., unique identifiers) associated with the data when the data has also been submitted to or can be found in other repositories such as GEO, SRA, dbGaP, etc.","","","","False","","","","list like error","string" +"Attribute","Description","Valid Values","DependsOn","Required","Parent","Source","Validation Rules","columnType","Format","Minimum","Maximum","Pattern" +"Clinical Metadata Template","A template outlining clinical metadata to collect for study subjects.","","Component, program, project, species, individualID, diagnosis, age, ageUnits, sex, race, ethnicity, height, heightUnits, weight, weightUnits, comorbidities","","","","","","","","","" +"vitiligo","A valid value of `diagnosis` that triggers conditional dependencies for additional attributes.","","vitiligoPattern, VIDA, VASI, VETI","","","","","","","","","" +"diabetes","A valid value of `comorbidities` that triggers conditional dependencies for additional attributes.","","diabetesType","","","","","","","","","" +"psoriasis","A valid value of `diagnosis` that triggers conditional dependencies for additional attributes.","","PASI","","","","","","","","","" +"dermatomyositis","A valid value of `diagnosis` that triggers conditional dependencies for additional attributes.","","CDASI","","","","","","","","","" +"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" +"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" +"individualID","Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.","","","TRUE","","","","string","","","","" +"project","A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.","AIM for RA, ELLIPSS, LOCKIT, RA, SLE, STAMP, UMass V-CoRT","","TRUE","","","","string","","","","" +"Component","A high-level attribute for grouping attributes into templates.","","","True","","","","string","","","","" +"fileFormat","Standard file format name or file extension","bai, bam, bed, bim, csv, czi, docx, dose, erate, fam, fastq, fcs, geojson, h5, h5ad, info, mcd, mtx, parquet, pdf, py, rds, rec, svs, tbi, tgz, tsv, txt, vcf, xls, xlsx, zip","","True","","","","string","","","","" +"program","Name of the funding program that supported the generation of data and associated files","AMP AIM, AMP RA/SLE, Community Contribution","","True","","","","string","","","","" +"programPhase","A label noting which AMP RA/SLE program phase generated the data.","I, II","","True","","","list like error","string_list","","","","" +"resourceType","High-level classification of the file content","code, experimental data, figure, metadata","","True","","","","string","","","","" +"dataType","High-level classification of the type of data contained in the file, loosely related to the experimental method or biological entity that is being profiled. Select all that apply using a comma-delimited list, though in most cases only a single label is expected. For multimodal datasets with concomitant profiling of biospecimen include 'multimodal'.","cytometry, epigenomics, genomics, histology, immune repertoire profiling, immunostaining, lipidomics, metabolomics, microbiome, multimodal, proteomics, transcriptomics","","True","","","list like error","string_list","","","","" +"dataSubtype","General classification to differentiate between omics profiling modalities. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list, however this is largely only expected in the context of Datasets and files that contain integrated experimental data spanning multiple types.","bulk, none, pseudobulk, single-cell, single-nucleus, spatial","","True","","","list like error","string_list","","","","" +"dataLevel","Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.","1, 2, 3, 4, 5","","True","","","","string","","","","" +"assay","The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.","ASAPSeq, CE-MS, CITESeq, CosMX, CyTOF, GenePS SeqFISH, H&E, LC-MS/MS, NULISA, Olink Explore HT, Olink Flex, Olink Focus, Olink Reveal, Olink Target 48, Olink Target 96, RNASeq, SNP array, SomaScan, VDJSeq, Visium, WES, WGS, Xenium, feature barcode sequencing, flow cytometry, imaging mass cytometry, imaging mass spectrometry, kiloplex, multiplexed ELISA, scRNASeq, scVDJSeq, serial IHC, snATACSeq, snRNASeq","","True","","","list like error","string_list","","","","" +"10xProbeSetReference","Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.","Flex Human Transcriptome Probe Set v1.0.1, Flex Human Transcriptome Probe Set v1.1.0, Visium Human Transcriptome v1, Visium Human Transcriptome v2, custom probe set","","True","","","","string","","","","" +"platform","The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.","BD FACSAria Fusion cell sorter, BD FACSAria III, BD FACSCanto, BD FACSCanto II, BD FACSDiscover A8, BD FACSDiscover S8, BD FACSLyric Clinical, BD FACSMelody, BD FACSymphony S6, BD LSRFortessa, Chromium Controller, Chromium GEM-X Single Cell 3' Chip v4, Chromium Next GEM Chip G, Chromium Next GEM Chip H, Chromium Next GEM Chip K, Chromium Next GEM Chip M, Chromium Next GEM Chip Q, Chromium X, Chromium Xo, Chromium iX, CyTOF XT, Cytek Aurora, Cytek Aurora Evo, Fluidigm BioMark, GEM-X Flex Gene Expression Chip, GEM-X OCM 5' Chip, Helios Mass Cytometer, Hyperion, Illumina HiSeq 2500, Illumina HiSeq X Ten, Illumina NextSeq 500, Illumina NovaSeq 6000, Illumina NovaSeq X, Not Applicable, Olink Signature Q100, Sony MA900, Thermo Fisher Attune CytPix, Thermo Fisher Attune NxT, Thermo Fisher Attune Xenith, Visium CytAssist, Xenium, none, unknown","","True","","","list like error","string_list","","","","" +"specimenModality","Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens","multispecimen, single specimen, unknown","","True","","","","string","","","","" +"biospecimenID","A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.","","","True","","","#BiospecimenMetadataTemplate unique error^^list like error","string_list","","","","" +"parentBiospecimenID","The biospecimenID associated with the originating biospecimen for derived or child biospecimens.","","","False","","","","string","","","","" +"biospecimenType","A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.","PBMCs, cell line, fibroblast-like synoviocyte, kidney biopsy, none, plasma, primary cell culture, saliva, salivary gland, serum, skin biopsy, skin swab, stool, suction blister cells, suction blister fluid, synovial fluid, synovial tissue, total leukocytes, urine, uvea, whole blood","","True","","","#BiospecimenMetadataTemplate str^^#InVitroBiospecimenMetadataTemplate^^list like error","string_list","","","","" +"primaryCellSource","A label indicating the biological source material from which a primary cell culture was derived.","PBMCs, kidney, pannus-derived dermis, pannus-derived epidermis, salivary gland, synovial tissue, total leukocytes, urine, uvea, whole blood","","True","","","","string","","","","" +"metadataType","A label further classifying the content of metadata resource.","assay, biospecimen, cell coordinates, data dictionary, file manifest, medication, other, phenotype, protocol, single-cell metadata, target panel, template, tissue microarray map, tissue multiarray map, user manual","","True","","","","string","","","","" +"codingLanguage","The coding, aka programming, language(s) contained in the file marked with `resourceType = code`. Select all that apply.","C, C#, C++, Fortan, Java, Julia, Matlab, Python, R, Ruby, SAS","","True","","","","string","","","","" +"visitID","Ordinal ID distinguishing different patient visits.","","","True","","","","string","","","","" +"softwareAndVersion","Relevant software and version used to generate the data file.","BD FACSDiva 8.0.1, Cell Ranger 9.0.1, Cell Ranger ATAC v1.1.0, Cell Ranger v3.0.0, Cell Ranger v3.0.1, Cell Ranger v3.0.2, Cell Ranger v3.1.0, Cell Ranger v4.0.0, Cell Ranger v5.0.0, Cell Ranger v5.0.1, Cell Ranger v6.0.0, Cell Ranger v6.0.1, Cell Ranger v6.0.2, Cell Ranger v6.1.0, Cell Ranger v6.1.1, Cell Ranger v6.1.2, Cell Ranger v7.0.0, Cell Ranger v7.0.1, Cell Ranger v7.1.0, Cell Ranger v7.2.0, Cell Ranger v8.0.0, Cell Ranger v8.0.1, Cell Ranger v9.0.0, Space Ranger 3.0.0, Space Ranger 3.0.1, Space Ranger 3.1.0, Space Ranger 3.1.1, Space Ranger 3.1.2, Space Ranger 3.1.3, demuxlet","","False","","","","string","","","","" +"targetPanel","A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.","","","True","","","","string","","","","" +"targetPanelSize","The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).","","","False","","","int error","integer","","","","" +"nucleicAcidSource","The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.","BCR mRNA, CRISPR protospacer feature barcode, TCR mRNA, Tn5-accessible gDNA, antigen capture barcode, gDNA, globin-depleted RNA, intracellular protein feature barcode, multiplexing oligo, poly(A) RNA, rRNA-depleted RNA, surface protein feature barcode","","True","","","list like error","string_list","","","","" +"biospecimenSubtype","Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.","FFPE tissue, PFA-fixed tissue, cell or tissue lysate, cell suspension, flow-sorted cells, fresh tissue, frozen tissue, nuclei suspension, supernatant","","False","","","","string","","","","" +"cellRangerOutput","10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz","Not Applicable, filtered MEX, filtered_feature_bc_matrix, filtered_peak_bc_matrix, raw MEX, raw_feature_bc_matrix, raw_peak_bc_matrix","","True","","","","string","","","","" +"alignmentReference","The genomic/transcriptomic reference used for performing read alignment against.","10x Cell Ranger Human GRCh38 2020-A, 10x Cell Ranger Human GRCh38 2024-A, GRCh38, modified GRCh38, unknown, vdj_GRCh38_alts_ensembl-4.0.0","","True","","","","string","","","","" +"libraryPrepMethod","Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.","10x Chromium Fixed RNA Human Transcriptome, 10x Chromium GEM-X Single Cell 3' v4, 10x Chromium GEM-X Single Cell 5' v3, 10x Chromium Next GEM Single Cell 3', 10x Chromium Next GEM Single Cell 3' 3.1, 10x Chromium Next GEM Single Cell 5' v1.1, 10x Chromium Next GEM Single Cell 5' v2, 10x Chromium Next GEM Single Cell ATAC v2, 10x Chromium Single Cell Human BCR, 10x Chromium Single Cell Human TCR, 10x GEM-X Flex Gene Expression Human, 10x GEM-X Universal 5' Gene Expression v3, CEL-Seq2, Chromium Next GEM Single Cell ATAC v1.1, Fluidigm C1 HT, NEBNext Human Immune Sequencing Kit, NEBNext Ultra II Directional RNA Library, Nextera XT, Nextera XT DNA, QIAseq miRNA Library, SMART-Seq Human BCR with UMI, SMART-Seq Human TCR with UMI, SMART-Seq v4 Ultra Low Input RNA, SMARTer Stranded Total RNA v2, Takara Human BCR profiling for Illumina, Takara Human TCR profiling for Illumina, Takara Human TCRv2 profiling for Illumina, Takara Human scTCR profiling for Illumina, TruSeq Stranded mRNA, custom DASH-treatment, in-house library prep","","True","","","","string","","","","" +"datasetType","High-level classification of dataset entity distinguishing between datasets compiled for a specific publication or as a general data resource.","experimental, publication","","True","","","","string","","","","" +"acknowledgmentStatement","A Dataset-specific attribute specifying the path to the wiki subpage within the ARK Portal - backend project that contains the acknowledgement statement that must be included in publications using data from the given dataset as a stipulation of the conditions of use.","syn26710600/wiki/619685","","True","","","","string","","","","" +"ARKRelease","A Dataset-specific attribute specifying the ARK Portal release in which this dataset was first made available to the public.","1.0, 2.0, 2024.06.R1, 2024.07.R1, 2024.08.R1, 2024.09.R1, 2024.10.R1, 2024.12.R1, 2025.01.R1, 2025.02.R1, 2025.03.R1, 2025.04.R1, 2025.05.R1, 2025.06.R1, 2025.07.R1, 2025.08.R1, 2025.09.R1, 2025.10.R1, 2025.11.R1, 2025.12.R1","","True","","","","string","","","","" +"RObjectClass","Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.","ROCR prediction.object, Seurat object, SummarizedExperiment, Symphony reference, data.frame, list, matrix, sparse matrix, vector","","False","","","","string","","","","" +"diagnosis","A high-level classifier indicating the disease status of an individual.","At-Risk RA, Not Applicable, OA, RA, SLE, Sjogren's disease, control, cutaneous lupus erythematosus, dermatomyositis, discoid lupus erythematosus, lupus nephritis, psoriasis, psoriatic arthritis, scleroderma, unknown, vitiligo","","True","","","list like error","string_list","","","","" +"libraryID","A library label or name, unique within an experiment, used to distinguish sequencing libraries.","","","True","","","","string","","","","" +"associatedCodeURL","A URL to the repository where associated code is available.","","","False","","","","string","uri","","","" +"FMAID","Functional Model of Anatomy ontology ID corresponding to the anatomical site and origin of the biospecimen. This attribute is assigned by ARK Portal data managers.","","","True","","https://bioportal.bioontology.org/ontologies/FMA/?p=summary","int error","integer","","","","" +"skinSiteStatus","Disease manifestation status of skin biospecimen.","healthy control, lesional, lesional proximal, non-lesional","","True","","","","string","","","","" +"salivaCollectionProcedure","Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.","stimulated, unstimulated","","True","","","","string","","","","" +"synovialCollectionProcedure","Classification of procedure for synovial tissue collection.","arthroplasty, biopsy, synovectomy, unknown","","True","","","","string","","","","" +"publicationType","General classification of publication.","correction, peer-reviewed, pre-print","","True","","","","string","","","","" +"title","Title of the publication.","","","True","","","","string","","","","" +"journal","Journal in which the publication was released","","","True","","","","string","","","","" +"publicationDate","The publication date extracted from PubMed database","","","True","","","","string","","","","" +"DOI","Digital object identifier","","","True","","","","string","uri-reference","","","" +"anatomicalSite","The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.","left 2nd MCP joint, left ankle joint, left hip joint, left knee joint, left wrist joint, other site, right 1st MTP joint, right 2nd MCP joint, right 2nd MTP joint, right 3rd MCP joint, right ankle joint, right hip joint, right knee joint, right wrist joint, unknown","","True","","","","string","","","","" +"vitiligoPattern","A high-level classification of vitiligo based on the distribution and patterning of lesions across the body.","mixed, non-segmental, segmental, unclassified","","False","","","","string","","","","" +"vitiligoPhenotype","Classification of vitiligo lesions which correlate with autoimmune activity and result in specific skin and depigmentation manifestations at the lesion site. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list.","active, confetti, inflammatory, none, trichrome","","True","","","list like error","string_list","","","","" +"psoriasisType","General type classification of psoriasis disease manifestation.","erythrodermic, guttate, inverse, plaque, pustular","","False","","","","string","","","","" +"ageDiagnosis","Age at which subject was diagnosed with `diagnosis`. If providing this value be sure the unit matches that used for `age`.","","","False","","","num error","number","","","","" +"age","Age at which subject was enrolled in study or age at corresponding visit and data collection event. If value unknown, enter '-1'.","","","True","","","num error","number","","","","" +"ageUnits","The unit of measure used for `ageEnrollment` and `ageDiagnosis`","months, years","","True","","","","string","","","","" +"sex","A textual description of a person's sex at birth.","female, intersex, male, unknown","","True","","","","string","","","","" +"height","Standing height of subject.","","","True","","","num error","number","","","","" +"race","A textual description of a person's race.","American Indian or Alaska Native, Asian, Black or African American, Hispanic, Mixed Race, Native Hawaiian or Other Pacific Islander, White, other, unknown","","False","","","","string","","","","" +"ethnicity","The ethnicity of a person.","Hispanic or Latino, Not Hispanic or Latino, unknown","","False","","","","string","","","","" +"heightUnits","Unit of measure of value provided for `height`.","centimeters, feet, inches, meters","","True","","","","string","","","","" +"weight","Weight of subject. If value unknown, enter '-1'.","","","True","","","num error","number","","","","" +"weightUnits","Abbreviated unit of measure of value provided for `weight`.","g, kg, lb, oz","","True","","","","string","","","","" +"comorbidities","Any diseases or medical condition that is simultaneously present in addition to `diagnosis`.","Hashimoto's Thyroiditis, autoimmune thyroid disease, cardiovascular disease, diabetes, inflammatory bowel disease, multiple sclerosis, other, psoriasis, psoriatic arthritis, pulmonary disease, rheumatoid arthritis, systemic lupus erythematosus","","False","","","list like error","string_list","","","","" +"diabetesType","Type of diabetes mellitus.","gestational, type 1, type 2, unknown","","False","","","","string","","","","" +"VIDA","Vitiligo Disease Activity Score, a six-point scale that evaluates the activity of vitiligo","","","False","","","","string","","","","" +"VASI","Total body Vitiligo Area Severity Index, a measure of the percentage of vitiligo involvement across the body calculated in terms of hand units.","","","False","","","","string","","","","" +"VETI","Vitiligo Extent Tensity Index measures the extent of vitiligo by a numerical score and combines analysis of extensity and severity of vitiligo to produce a constant and reproducible number.","","","False","","https://doi.org/10.5826/dpc.0404a18","num error","number","","","","" +"PASI","Psoriasis Area and Severity Index, is a measurement of the discoloration, thickness, scaling, and coverage of psoriasis plaques.","","","False","","","","string","","","","" +"CDASI","Cutaneous Dermatomyositis Disease Area and Severity Index, is a measurement used to characterize cutaneous dermatomyositis severity.","","","False","","","","string","","","","" +"inputCellCount","An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.","","","True","","","int error","integer","","","","" +"totalReads","Total number of reads sequenced from the library.","","","True","","","int error","integer","","","","" +"FACSPopulation","A description of the marker gating strategy used to derive the population cells with FACS.","","","True","","","","string","","","","" +"cellType","The cell type name from Cell Ontology for the corresponding CL identifier.","","","False","","","","string","","","","" +"userDefinedCellType","User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.","","","False","","","","string","","","","" +"krennInflammatory","A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennLining","A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennStroma","A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennSynovitisScore","The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"datasetStatus","A categorical label indicating the status of an ARK Portal dataset. This is applied to improve downstream management of datasets as well as various ETL workflows.","deprecated, released, test, under peer review, unreleased","","True","","","","string","","","","" +"sampleProcessingBatch","A label indicating batching of sample processing or preparation that occurs prior to data collection.","","","False","","","","string","","","","" +"dataCollectionBatch","A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.","","","False","","","","string","","","","" +"metadataStandards","Metadata standards used to generate the metadata","ARK data model, user-defined","","False","","","","string","","","","" +"skinSunExposure","For skin-based biospecimen, this attribute indicates whether the sample was collected from an anatomical site that does or does not receive routine sun exposure.","not sun exposed, sun exposed","","False","","","","string","","","","" +"sequencingSpikeIn","Pre-made sequencing libraries may be added to your sequencing run to improve sequencing results. If such a 'spike-in' library was used, this attribute specifies the name of the library and the percentage of the spike-in library used in the sequencing run.","","","False","","","","string","","","","" +"librarySpikeIn","Pre-made collections of nucleic acid fragments can be added to samples during the library construction process to improve downstream quantification and statistical analyses. If such a 'spike-in' was used, this attribute specifies the name and manufacturer of the spike-in.","","","False","","","","string","","","","" +"readLength","The number of base pairs (bp) sequenced for reads in a fastq file.","","","False","","","int error","integer","","","","" +"plateID","An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.","","","False","","","","string","","","","" +"processedDataType","A label used for file annotations to provide a brief description of the processed data file.","barcode counts, differential expression results, epigenomic peaks, gene counts","","False","","","list like error","string_list","","","","" +"eventCount","The total number of events detected and captured in the corresponding FCS file. In cytometry, an event corresponds to particles detected and measured by the instrument. This number corresponds to the number of rows in the FCS file and is expected to be a whole integer number.","","","False","","","int error","integer","","","","" +"notes","Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.","","","False","","","","string","","","","" +"species","The genus species of sample or subject origin.","Homo sapiens","","True","","","","string","","","","" +"treatment","A short descriptive label indicating what experimental treatments have been applied to a biospecimen before data capture. Please include the word 'control' to indicate specific treatments that are intended to serve as a control group.","","","True","","","","string","","","","" +"treatmentTimepoint","Where applicable, specify the timepoint relative to treatment to distinguish different sets of samples that have undergone the same treatment but for different lengths of time. REQUIRED: please specify the unit of time, e.g., secs, mins, hrs, days, etc.","","","False","","","","string","","","","" +"slideID","A distinct label or name, unique within an experiment, assigned to an imaging slides.","","","True","","","","string","","","","" +"altSampleID","An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.","","","False","","","","string","","","","" +"sampleCollectionBatch","A label indicating batching of sample collection or experiment execution that occurs prior to data collection.","","","False","","","","string","","","","" +"associatedAccession","This is a File and Dataset annotation attribute indicating additional accessions (i.e., unique identifiers) associated with the data when the data has also been submitted to or can be found in other repositories such as GEO, SRA, dbGaP, etc.","","","False","","","list like error","string_list","","","","" +"percentCellViability","A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.","","","True","","","inRange 50 100 error","integer","","50","100","" +"sequencingSaturation","A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.","","","False","","https://kb.10xgenomics.com/hc/en-us/articles/115005062366-What-is-sequencing-saturation","inRange 0 1 error","number","","0","1","" +"publicationSynID","The synID of the corresponding Synapse entity that stores metadata about the publication. This is used to differentiate publication-specific files, often consisting of level 4 processed data and expanded subject metadata, in a publication dataset that also includes raw or minimally processed files from experimental datasets. This provides an easy way to distinguish and select for the publication-specific data from which the research findings were derived. When this attribute is used to annotate a Dataset it serves as a way to directly link the Dataset entity with the publication metadata stored in Synapse.","","","False","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"associatedDataset","The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.","","","False","","","list like::regex search ^syn[0-9]{8} error","string_list","","","","^syn[0-9]{8}" +"custom10xProbeSetSynID","If custom modified probe sets were used for collecting 10x Chromium Flex scRNA-seq data, then the probe reference files should be grouped as a zip or tar archive and uploaded as metadata. This attribute links the experimental data back to the probe set archive file via a synapse ID.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"targetPanelSynID","In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"datasetDescription","A Dataset-specific attribute specifying the synID of the folder that contains a wiki write-up of the dataset description. This wiki content will be surfaced on the ARK Portal frontend site.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"DOID","Disease ontology identifier associated with `diagnosis`. Attribute values are applied by ARK Portal data managers.","","","True","","","regex search ^DOID error","string","","","","^DOID" +"BRENDA","BRENDA Tissue and Enzyme Source Ontology ID (BTO) corresponding to the `biospecimenType`. Attribute values are applied by ARK Portal data managers.","","","True","","https://bioportal.bioontology.org/ontologies/BTO","regex search ^BTO error","string","","","","^BTO" +"year","Year (YYYY) in which the paper was published.","","","True","","","regex search [1-2][0-9]{3} error","string","","","","[1-2][0-9]{3}" +"PMID","PubMed(R) Identifier","","","True","","","regex search ^PMID error","string","","","","^PMID" +"ImmPortAccession","Accession to corresponding information in ImmPort.","","","False","","","regex search ^SDY error","string","","","","^SDY" +"cellOntologyID","Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.","","","False","","","regex search ^CL: error","string","","","","^CL:" +"PMCID","Pubmed Central Identifier, formatted as a compact URI string that will automatically resolve into a URL based on the Synapse bioregistry; e.g., pmc:PMCxxxxxxxx","","","False","","","regex search ^pmc:PMC[0-9]{8} error","string","","","","^pmc:PMC[0-9]{8}" diff --git a/model_contexts/fastq/ark.fastq_context.csv b/model_contexts/fastq/ark.fastq_context.csv index a73ac3db..f8c033a9 100644 --- a/model_contexts/fastq/ark.fastq_context.csv +++ b/model_contexts/fastq/ark.fastq_context.csv @@ -1,13 +1,13 @@ -"Attribute","Description","Valid Values","DependsOn","Properties","Required","Parent","DependsOn Component","Source","Validation Rules","columnType" -"Fastq File Annotation Template","A template outlining metadata to be collected and applied to fastq file entities as annotations in Synapse.","","Component, fileFormat, specimenModality, readLength, assay","","","","","","","" -"BDM Fastq File Annotations","A template outlining metadata to be collected and applied to fastq file entities as annotations in Synapse by BDM team as part of data release process.","","Component, fileFormat, resourceType, assay, specimenModality, readLength, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, libraryPrepMethod, nucleicAcidSource, platform, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","" -"multispecimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","libraryID","","","","","","","" -"single specimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","biospecimenID, individualID","","","","","","","" -"resourceType","","experimental data","","","TRUE","","","","","string" -"fileFormat","","fastq","","","TRUE","","","","","string" -"dataLevel","","1","","","TRUE","","","","","string" -"feature barcode sequencing","Is a valid value of `assay` that triggers conditional dependencies for additional attributes.","","targetPanel, targetPanelSynID, targetPanelSize","","","","","","","" -"assay","The technology used to generate the data in this file. Select all assays that apply. e.g., the GEX fastq files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.","scRNASeq, CITESeq, snRNASeq, snATACSeq, RNASeq, ASAPSeq, VDJSeq, scVDJSeq, feature barcode sequencing, WES, WGS","","","TRUE","","","","list like error","string" -"platform","The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.","Illumina NovaSeq 6000, Illumina HiSeq X Ten, Illumina NextSeq 500, Illumina NovaSeq X, Illumina HiSeq 2500, Chromium X, Chromium Next GEM Chip M, GEM-X Flex Gene Expression Chip, GEM-X OCM 5' Chip, Chromium Next GEM Chip G, Chromium GEM-X Single Cell 3' Chip v4, Chromium Xo, Chromium Next GEM Chip H, Chromium Controller, Chromium Next GEM Chip K, Chromium Next GEM Chip Q, Chromium iX, unknown","","","TRUE","","","","list like error","string" -"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" -"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" +"Attribute","Description","Valid Values","DependsOn","Required","Parent","Source","Validation Rules","columnType","Format","Minimum","Maximum","Pattern" +"Fastq File Annotation Template","A template outlining metadata to be collected and applied to fastq file entities as annotations in Synapse.","","Component, fileFormat, specimenModality, readLength, assay","","","","","","","","","" +"BDM Fastq File Annotations","A template outlining metadata to be collected and applied to fastq file entities as annotations in Synapse by BDM team as part of data release process.","","Component, fileFormat, resourceType, assay, specimenModality, readLength, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, libraryPrepMethod, nucleicAcidSource, platform, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","","","" +"multispecimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","libraryID","","","","","","","","","" +"single specimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","biospecimenID, individualID","","","","","","","","","" +"resourceType","","experimental data","","TRUE","","","","string","","","","" +"fileFormat","","fastq","","TRUE","","","","string","","","","" +"dataLevel","","1","","TRUE","","","","string","","","","" +"feature barcode sequencing","Is a valid value of `assay` that triggers conditional dependencies for additional attributes.","","targetPanel, targetPanelSynID, targetPanelSize","","","","","","","","","" +"assay","The technology used to generate the data in this file. Select all assays that apply. e.g., the GEX fastq files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.","scRNASeq, CITESeq, snRNASeq, snATACSeq, RNASeq, ASAPSeq, VDJSeq, scVDJSeq, feature barcode sequencing, WES, WGS","","TRUE","","","list like error","string_list","","","","" +"platform","The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.","Illumina NovaSeq 6000, Illumina HiSeq X Ten, Illumina NextSeq 500, Illumina NovaSeq X, Illumina HiSeq 2500, Chromium X, Chromium Next GEM Chip M, GEM-X Flex Gene Expression Chip, GEM-X OCM 5' Chip, Chromium Next GEM Chip G, Chromium GEM-X Single Cell 3' Chip v4, Chromium Xo, Chromium Next GEM Chip H, Chromium Controller, Chromium Next GEM Chip K, Chromium Next GEM Chip Q, Chromium iX, unknown","","TRUE","","","list like error","string_list","","","","" +"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" +"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" diff --git a/model_contexts/fastq/ark.fastq_model.csv b/model_contexts/fastq/ark.fastq_model.csv index e3b69375..5d93dca8 100644 --- a/model_contexts/fastq/ark.fastq_model.csv +++ b/model_contexts/fastq/ark.fastq_model.csv @@ -1,116 +1,116 @@ -"Attribute","Description","Valid Values","DependsOn","Properties","Required","Parent","DependsOn Component","Source","Validation Rules","columnType" -"Fastq File Annotation Template","A template outlining metadata to be collected and applied to fastq file entities as annotations in Synapse.","","Component, fileFormat, specimenModality, readLength, assay","","","","","","","" -"BDM Fastq File Annotations","A template outlining metadata to be collected and applied to fastq file entities as annotations in Synapse by BDM team as part of data release process.","","Component, fileFormat, resourceType, assay, specimenModality, readLength, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, libraryPrepMethod, nucleicAcidSource, platform, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","" -"multispecimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","libraryID","","","","","","","" -"single specimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","biospecimenID, individualID","","","","","","","" -"resourceType","High-level classification of the file content","experimental data","","","TRUE","","","","","string" -"fileFormat","Standard file format name or file extension","fastq","","","TRUE","","","","","string" -"dataLevel","Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.","1","","","TRUE","","","","","string" -"feature barcode sequencing","Is a valid value of `assay` that triggers conditional dependencies for additional attributes.","","targetPanel, targetPanelSynID, targetPanelSize","","","","","","","" -"assay","The technology used to generate the data in this file. Select all assays that apply. e.g., the GEX fastq files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.","scRNASeq, CITESeq, snRNASeq, snATACSeq, RNASeq, ASAPSeq, VDJSeq, scVDJSeq, feature barcode sequencing, WES, WGS","","","TRUE","","","","list like error","string" -"platform","The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.","Illumina NovaSeq 6000, Illumina HiSeq X Ten, Illumina NextSeq 500, Illumina NovaSeq X, Illumina HiSeq 2500, Chromium X, Chromium Next GEM Chip M, GEM-X Flex Gene Expression Chip, GEM-X OCM 5' Chip, Chromium Next GEM Chip G, Chromium GEM-X Single Cell 3' Chip v4, Chromium Xo, Chromium Next GEM Chip H, Chromium Controller, Chromium Next GEM Chip K, Chromium Next GEM Chip Q, Chromium iX, unknown","","","TRUE","","","","list like error","string" -"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" -"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" -"Component","A high-level attribute for grouping attributes into templates.","","","","True","","","","","string" -"publicationSynID","The synID of the corresponding Synapse entity that stores metadata about the publication. This is used to differentiate publication-specific files, often consisting of level 4 processed data and expanded subject metadata, in a publication dataset that also includes raw or minimally processed files from experimental datasets. This provides an easy way to distinguish and select for the publication-specific data from which the research findings were derived. When this attribute is used to annotate a Dataset it serves as a way to directly link the Dataset entity with the publication metadata stored in Synapse.","","","","False","","","","regex search ^syn[0-9]{8} error","string" -"associatedDataset","The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.","","","","False","","","","list like::regex search ^syn[0-9]{8} error","string" -"program","Name of the funding program that supported the generation of data and associated files","AMP AIM, AMP RA/SLE, Community Contribution","","","True","","","","","string" -"project","A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.","AIM for RA, ELLIPSS, LOCKIT, RA, SLE, STAMP, UMass V-CoRT","","","True","","","","list like error","string" -"programPhase","A label noting which AMP RA/SLE program phase generated the data.","I, II","","","True","","","","list like error","string" -"dataType","High-level classification of the type of data contained in the file, loosely related to the experimental method or biological entity that is being profiled. Select all that apply using a comma-delimited list, though in most cases only a single label is expected. For multimodal datasets with concomitant profiling of biospecimen include 'multimodal'.","cytometry, epigenomics, genomics, histology, immune repertoire profiling, immunostaining, lipidomics, metabolomics, microbiome, multimodal, proteomics, transcriptomics","","","True","","","","list like error","string" -"dataSubtype","General classification to differentiate between omics profiling modalities. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list, however this is largely only expected in the context of Datasets and files that contain integrated experimental data spanning multiple types.","bulk, none, pseudobulk, single-cell, single-nucleus, spatial","","","True","","","","list like error","string" -"10xProbeSetReference","Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.","Flex Human Transcriptome Probe Set v1.0.1, Flex Human Transcriptome Probe Set v1.1.0, Visium Human Transcriptome v1, Visium Human Transcriptome v2, custom probe set","","","True","","","","","string" -"custom10xProbeSetSynID","If custom modified probe sets were used for collecting 10x Chromium Flex scRNA-seq data, then the probe reference files should be grouped as a zip or tar archive and uploaded as metadata. This attribute links the experimental data back to the probe set archive file via a synapse ID.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"specimenModality","Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens","multispecimen, single specimen, unknown","","","True","","","","","string" -"individualID","Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.","","","","True","","","","#ClinicalMetadataTemplate unique error^^#BiospecimenMetadataTemplate str^^list like error","string" -"biospecimenID","A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.","","","","True","","","","#BiospecimenMetadataTemplate unique error^^list like error","string" -"parentBiospecimenID","The biospecimenID associated with the originating biospecimen for derived or child biospecimens.","","","","False","","","","","string" -"biospecimenType","A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.","PBMCs, cell line, fibroblast-like synoviocyte, kidney biopsy, none, plasma, primary cell culture, saliva, salivary gland, serum, skin biopsy, skin swab, stool, suction blister cells, suction blister fluid, synovial fluid, synovial tissue, total leukocytes, urine, uvea, whole blood","","","True","","","","#BiospecimenMetadataTemplate str^^#InVitroBiospecimenMetadataTemplate^^list like error","string" -"primaryCellSource","A label indicating the biological source material from which a primary cell culture was derived.","PBMCs, kidney, pannus-derived dermis, pannus-derived epidermis, salivary gland, synovial tissue, total leukocytes, urine, uvea, whole blood","","","True","","","","","string" -"metadataType","A label further classifying the content of metadata resource.","assay, biospecimen, cell coordinates, data dictionary, file manifest, medication, other, phenotype, protocol, single-cell metadata, target panel, template, tissue microarray map, tissue multiarray map, user manual","","","True","","","","","string" -"codingLanguage","The coding, aka programming, language(s) contained in the file marked with `resourceType = code`. Select all that apply.","C, C#, C++, Fortan, Java, Julia, Matlab, Python, R, Ruby, SAS","","","True","","","","","string" -"visitID","Ordinal ID distinguishing different patient visits.","","","","True","","","","","string" -"softwareAndVersion","Relevant software and version used to generate the data file.","BD FACSDiva 8.0.1, Cell Ranger 9.0.1, Cell Ranger ATAC v1.1.0, Cell Ranger v3.0.0, Cell Ranger v3.0.1, Cell Ranger v3.0.2, Cell Ranger v3.1.0, Cell Ranger v4.0.0, Cell Ranger v5.0.0, Cell Ranger v5.0.1, Cell Ranger v6.0.0, Cell Ranger v6.0.1, Cell Ranger v6.0.2, Cell Ranger v6.1.0, Cell Ranger v6.1.1, Cell Ranger v6.1.2, Cell Ranger v7.0.0, Cell Ranger v7.0.1, Cell Ranger v7.1.0, Cell Ranger v7.2.0, Cell Ranger v8.0.0, Cell Ranger v8.0.1, Cell Ranger v9.0.0, Space Ranger 3.0.0, Space Ranger 3.0.1, Space Ranger 3.1.0, Space Ranger 3.1.1, Space Ranger 3.1.2, Space Ranger 3.1.3, demuxlet","","","False","","","","","string" -"targetPanel","A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.","","","","True","","","","","string" -"targetPanelSynID","In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"targetPanelSize","The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).","","","","False","","","","int error","integer" -"nucleicAcidSource","The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.","BCR mRNA, CRISPR protospacer feature barcode, TCR mRNA, Tn5-accessible gDNA, antigen capture barcode, gDNA, globin-depleted RNA, intracellular protein feature barcode, multiplexing oligo, poly(A) RNA, rRNA-depleted RNA, surface protein feature barcode","","","True","","","","list like error","string" -"biospecimenSubtype","Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.","FFPE tissue, PFA-fixed tissue, cell or tissue lysate, cell suspension, flow-sorted cells, fresh tissue, frozen tissue, nuclei suspension, supernatant","","","False","","","","","string" -"cellRangerOutput","10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz","Not Applicable, filtered MEX, filtered_feature_bc_matrix, filtered_peak_bc_matrix, raw MEX, raw_feature_bc_matrix, raw_peak_bc_matrix","","","True","","","","","string" -"alignmentReference","The genomic/transcriptomic reference used for performing read alignment against.","10x Cell Ranger Human GRCh38 2020-A, 10x Cell Ranger Human GRCh38 2024-A, GRCh38, modified GRCh38, unknown, vdj_GRCh38_alts_ensembl-4.0.0","","","True","","","","","string" -"libraryPrepMethod","Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.","10x Chromium Fixed RNA Human Transcriptome, 10x Chromium GEM-X Single Cell 3' v4, 10x Chromium GEM-X Single Cell 5' v3, 10x Chromium Next GEM Single Cell 3', 10x Chromium Next GEM Single Cell 3' 3.1, 10x Chromium Next GEM Single Cell 5' v1.1, 10x Chromium Next GEM Single Cell 5' v2, 10x Chromium Next GEM Single Cell ATAC v2, 10x Chromium Single Cell Human BCR, 10x Chromium Single Cell Human TCR, 10x GEM-X Flex Gene Expression Human, 10x GEM-X Universal 5' Gene Expression v3, CEL-Seq2, Chromium Next GEM Single Cell ATAC v1.1, Fluidigm C1 HT, NEBNext Human Immune Sequencing Kit, NEBNext Ultra II Directional RNA Library, Nextera XT, Nextera XT DNA, QIAseq miRNA Library, SMART-Seq Human BCR with UMI, SMART-Seq Human TCR with UMI, SMART-Seq v4 Ultra Low Input RNA, SMARTer Stranded Total RNA v2, Takara Human BCR profiling for Illumina, Takara Human TCR profiling for Illumina, Takara Human TCRv2 profiling for Illumina, Takara Human scTCR profiling for Illumina, TruSeq Stranded mRNA, custom DASH-treatment, in-house library prep","","","True","","","","","string" -"datasetType","High-level classification of dataset entity distinguishing between datasets compiled for a specific publication or as a general data resource.","experimental, publication","","","True","","","","","string" -"acknowledgmentStatement","A Dataset-specific attribute specifying the path to the wiki subpage within the ARK Portal - backend project that contains the acknowledgement statement that must be included in publications using data from the given dataset as a stipulation of the conditions of use.","syn26710600/wiki/619685","","","True","","","","","string" -"datasetDescription","A Dataset-specific attribute specifying the synID of the folder that contains a wiki write-up of the dataset description. This wiki content will be surfaced on the ARK Portal frontend site.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"ARKRelease","A Dataset-specific attribute specifying the ARK Portal release in which this dataset was first made available to the public.","1.0, 2.0, 2024.06.R1, 2024.07.R1, 2024.08.R1, 2024.09.R1, 2024.10.R1, 2024.12.R1, 2025.01.R1, 2025.02.R1, 2025.03.R1, 2025.04.R1, 2025.05.R1, 2025.06.R1, 2025.07.R1, 2025.08.R1, 2025.09.R1, 2025.10.R1, 2025.11.R1, 2025.12.R1","","","True","","","","","string" -"RObjectClass","Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.","ROCR prediction.object, Seurat object, SummarizedExperiment, Symphony reference, data.frame, list, matrix, sparse matrix, vector","","","False","","","","","string" -"diagnosis","A high-level classifier indicating the disease status of an individual.","At-Risk RA, Not Applicable, OA, RA, SLE, Sjogren's disease, control, cutaneous lupus erythematosus, dermatomyositis, discoid lupus erythematosus, lupus nephritis, psoriasis, psoriatic arthritis, scleroderma, unknown, vitiligo","","","True","","","","list like error","string" -"DOID","Disease ontology identifier associated with `diagnosis`. Attribute values are applied by ARK Portal data managers.","","","","True","","","","regex search ^DOID error","string" -"libraryID","A library label or name, unique within an experiment, used to distinguish sequencing libraries.","","","","True","","","","","string" -"associatedCodeURL","A URL to the repository where associated code is available.","","","","False","","","","","string" -"FMAID","Functional Model of Anatomy ontology ID corresponding to the anatomical site and origin of the biospecimen. This attribute is assigned by ARK Portal data managers.","","","","True","","","https://bioportal.bioontology.org/ontologies/FMA/?p=summary","int error","integer" -"BRENDA","BRENDA Tissue and Enzyme Source Ontology ID (BTO) corresponding to the `biospecimenType`. Attribute values are applied by ARK Portal data managers.","","","","True","","","https://bioportal.bioontology.org/ontologies/BTO","regex search ^BTO error","string" -"skinSiteStatus","Disease manifestation status of skin biospecimen.","healthy control, lesional, lesional proximal, non-lesional","","","True","","","","","string" -"salivaCollectionProcedure","Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.","stimulated, unstimulated","","","True","","","","","string" -"synovialCollectionProcedure","Classification of procedure for synovial tissue collection.","arthroplasty, biopsy, synovectomy, unknown","","","True","","","","","string" -"publicationType","General classification of publication.","correction, peer-reviewed, pre-print","","","True","","","","","string" -"title","Title of the publication.","","","","True","","","","","string" -"journal","Journal in which the publication was released","","","","True","","","","","string" -"year","Year (YYYY) in which the paper was published.","","","","True","","","","regex search [1-2][0-9]{3} error","string" -"publicationDate","The publication date extracted from PubMed database","","","","True","","","","","string" -"PMID","PubMed(R) Identifier","","","","True","","","","regex search ^PMID error","string" -"DOI","Digital object identifier","","","","True","","","","","string" -"anatomicalSite","The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.","left 2nd MCP joint, left ankle joint, left hip joint, left knee joint, left wrist joint, other site, right 1st MTP joint, right 2nd MCP joint, right 2nd MTP joint, right 3rd MCP joint, right ankle joint, right hip joint, right knee joint, right wrist joint, unknown","","","True","","","","","string" -"vitiligoPattern","A high-level classification of vitiligo based on the distribution and patterning of lesions across the body.","mixed, non-segmental, segmental, unclassified","","","False","","","","","string" -"vitiligoPhenotype","Classification of vitiligo lesions which correlate with autoimmune activity and result in specific skin and depigmentation manifestations at the lesion site. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list.","active, confetti, inflammatory, none, trichrome","","","True","","","","list like error","string" -"psoriasisType","General type classification of psoriasis disease manifestation.","erythrodermic, guttate, inverse, plaque, pustular","","","False","","","","","string" -"ageDiagnosis","Age at which subject was diagnosed with `diagnosis`. If providing this value be sure the unit matches that used for `age`.","","","","False","","","","num error","number" -"age","Age at which subject was enrolled in study or age at corresponding visit and data collection event. If value unknown, enter '-1'.","","","","True","","","","num error","number" -"ageUnits","The unit of measure used for `ageEnrollment` and `ageDiagnosis`","months, years","","","True","","","","","string" -"sex","A textual description of a person's sex at birth.","female, intersex, male, unknown","","","True","","","","","string" -"height","Standing height of subject.","","","","True","","","","num error","number" -"race","A textual description of a person's race.","American Indian or Alaska Native, Asian, Black or African American, Hispanic, Mixed Race, Native Hawaiian or Other Pacific Islander, White, other, unknown","","","False","","","","","string" -"ethnicity","The ethnicity of a person.","Hispanic or Latino, Not Hispanic or Latino, unknown","","","False","","","","","string" -"heightUnits","Unit of measure of value provided for `height`.","centimeters, feet, inches, meters","","","True","","","","","string" -"weight","Weight of subject. If value unknown, enter '-1'.","","","","True","","","","num error","number" -"weightUnits","Abbreviated unit of measure of value provided for `weight`.","g, kg, lb, oz","","","True","","","","","string" -"comorbidities","Any diseases or medical condition that is simultaneously present in addition to `diagnosis`.","Hashimoto's Thyroiditis, autoimmune thyroid disease, cardiovascular disease, diabetes, inflammatory bowel disease, multiple sclerosis, other, psoriasis, psoriatic arthritis, pulmonary disease, rheumatoid arthritis, systemic lupus erythematosus","","","False","","","","list like error","string" -"diabetesType","Type of diabetes mellitus.","gestational, type 1, type 2, unknown","","","False","","","","","string" -"VIDA","Vitiligo Disease Activity Score, a six-point scale that evaluates the activity of vitiligo","","","","False","","","","","string" -"VASI","Total body Vitiligo Area Severity Index, a measure of the percentage of vitiligo involvement across the body calculated in terms of hand units.","","","","False","","","","","string" -"VETI","Vitiligo Extent Tensity Index measures the extent of vitiligo by a numerical score and combines analysis of extensity and severity of vitiligo to produce a constant and reproducible number.","","","","False","","","https://doi.org/10.5826/dpc.0404a18","num error","number" -"PASI","Psoriasis Area and Severity Index, is a measurement of the discoloration, thickness, scaling, and coverage of psoriasis plaques.","","","","False","","","","","string" -"CDASI","Cutaneous Dermatomyositis Disease Area and Severity Index, is a measurement used to characterize cutaneous dermatomyositis severity.","","","","False","","","","","string" -"percentCellViability","A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.","","","","True","","","","inRange 50 100 error","integer" -"inputCellCount","An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.","","","","True","","","","int error","integer" -"totalReads","Total number of reads sequenced from the library.","","","","True","","","","int error","integer" -"sequencingSaturation","A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.","","","","False","","","https://kb.10xgenomics.com/hc/en-us/articles/115005062366-What-is-sequencing-saturation","inRange 0 1 error","number" -"ImmPortAccession","Accession to corresponding information in ImmPort.","","","","False","","","","regex search ^SDY error","string" -"FACSPopulation","A description of the marker gating strategy used to derive the population cells with FACS.","","","","True","","","","","string" -"cellOntologyID","Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.","","","","False","","","","regex search ^CL: error","string" -"cellType","The cell type name from Cell Ontology for the corresponding CL identifier.","","","","False","","","","","string" -"userDefinedCellType","User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.","","","","False","","","","","string" -"krennInflammatory","A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennLining","A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennStroma","A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennSynovitisScore","The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"datasetStatus","A categorical label indicating the status of an ARK Portal dataset. This is applied to improve downstream management of datasets as well as various ETL workflows.","deprecated, released, test, under peer review, unreleased","","","True","","","","","string" -"sampleProcessingBatch","A label indicating batching of sample processing or preparation that occurs prior to data collection.","","","","False","","","","","string" -"dataCollectionBatch","A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.","","","","False","","","","","string" -"metadataStandards","Metadata standards used to generate the metadata","ARK data model, user-defined","","","False","","","","","string" -"skinSunExposure","For skin-based biospecimen, this attribute indicates whether the sample was collected from an anatomical site that does or does not receive routine sun exposure.","not sun exposed, sun exposed","","","False","","","","","string" -"sequencingSpikeIn","Pre-made sequencing libraries may be added to your sequencing run to improve sequencing results. If such a 'spike-in' library was used, this attribute specifies the name of the library and the percentage of the spike-in library used in the sequencing run.","","","","False","","","","","string" -"librarySpikeIn","Pre-made collections of nucleic acid fragments can be added to samples during the library construction process to improve downstream quantification and statistical analyses. If such a 'spike-in' was used, this attribute specifies the name and manufacturer of the spike-in.","","","","False","","","","","string" -"readLength","The number of base pairs (bp) sequenced for reads in a fastq file.","","","","False","","","","int error","integer" -"plateID","An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.","","","","False","","","","","string" -"PMCID","Pubmed Central Identifier, formatted as a compact URI string that will automatically resolve into a URL based on the Synapse bioregistry; e.g., pmc:PMCxxxxxxxx","","","","False","","","","regex search ^pmc:PMC[0-9]{8} error","string" -"processedDataType","A label used for file annotations to provide a brief description of the processed data file.","barcode counts, differential expression results, epigenomic peaks, gene counts","","","False","","","","list like error","string" -"eventCount","The total number of events detected and captured in the corresponding FCS file. In cytometry, an event corresponds to particles detected and measured by the instrument. This number corresponds to the number of rows in the FCS file and is expected to be a whole integer number.","","","","False","","","","int error","integer" -"notes","Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.","","","","False","","","","","string" -"species","The genus species of sample or subject origin.","Homo sapiens","","","True","","","","","string" -"treatment","A short descriptive label indicating what experimental treatments have been applied to a biospecimen before data capture. Please include the word 'control' to indicate specific treatments that are intended to serve as a control group.","","","","True","","","","","string" -"treatmentTimepoint","Where applicable, specify the timepoint relative to treatment to distinguish different sets of samples that have undergone the same treatment but for different lengths of time. REQUIRED: please specify the unit of time, e.g., secs, mins, hrs, days, etc.","","","","False","","","","","string" -"slideID","A distinct label or name, unique within an experiment, assigned to an imaging slides.","","","","True","","","","","string" -"altSampleID","An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.","","","","False","","","","","string" -"sampleCollectionBatch","A label indicating batching of sample collection or experiment execution that occurs prior to data collection.","","","","False","","","","","string" -"associatedAccession","This is a File and Dataset annotation attribute indicating additional accessions (i.e., unique identifiers) associated with the data when the data has also been submitted to or can be found in other repositories such as GEO, SRA, dbGaP, etc.","","","","False","","","","list like error","string" +"Attribute","Description","Valid Values","DependsOn","Required","Parent","Source","Validation Rules","columnType","Format","Minimum","Maximum","Pattern" +"Fastq File Annotation Template","A template outlining metadata to be collected and applied to fastq file entities as annotations in Synapse.","","Component, fileFormat, specimenModality, readLength, assay","","","","","","","","","" +"BDM Fastq File Annotations","A template outlining metadata to be collected and applied to fastq file entities as annotations in Synapse by BDM team as part of data release process.","","Component, fileFormat, resourceType, assay, specimenModality, readLength, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, libraryPrepMethod, nucleicAcidSource, platform, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","","","" +"multispecimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","libraryID","","","","","","","","","" +"single specimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","biospecimenID, individualID","","","","","","","","","" +"resourceType","High-level classification of the file content","experimental data","","TRUE","","","","string","","","","" +"fileFormat","Standard file format name or file extension","fastq","","TRUE","","","","string","","","","" +"dataLevel","Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.","1","","TRUE","","","","string","","","","" +"feature barcode sequencing","Is a valid value of `assay` that triggers conditional dependencies for additional attributes.","","targetPanel, targetPanelSynID, targetPanelSize","","","","","","","","","" +"assay","The technology used to generate the data in this file. Select all assays that apply. e.g., the GEX fastq files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.","scRNASeq, CITESeq, snRNASeq, snATACSeq, RNASeq, ASAPSeq, VDJSeq, scVDJSeq, feature barcode sequencing, WES, WGS","","TRUE","","","list like error","string_list","","","","" +"platform","The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.","Illumina NovaSeq 6000, Illumina HiSeq X Ten, Illumina NextSeq 500, Illumina NovaSeq X, Illumina HiSeq 2500, Chromium X, Chromium Next GEM Chip M, GEM-X Flex Gene Expression Chip, GEM-X OCM 5' Chip, Chromium Next GEM Chip G, Chromium GEM-X Single Cell 3' Chip v4, Chromium Xo, Chromium Next GEM Chip H, Chromium Controller, Chromium Next GEM Chip K, Chromium Next GEM Chip Q, Chromium iX, unknown","","TRUE","","","list like error","string_list","","","","" +"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" +"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" +"Component","A high-level attribute for grouping attributes into templates.","","","True","","","","string","","","","" +"program","Name of the funding program that supported the generation of data and associated files","AMP AIM, AMP RA/SLE, Community Contribution","","True","","","","string","","","","" +"project","A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.","AIM for RA, ELLIPSS, LOCKIT, RA, SLE, STAMP, UMass V-CoRT","","True","","","list like error","string_list","","","","" +"programPhase","A label noting which AMP RA/SLE program phase generated the data.","I, II","","True","","","list like error","string_list","","","","" +"dataType","High-level classification of the type of data contained in the file, loosely related to the experimental method or biological entity that is being profiled. Select all that apply using a comma-delimited list, though in most cases only a single label is expected. For multimodal datasets with concomitant profiling of biospecimen include 'multimodal'.","cytometry, epigenomics, genomics, histology, immune repertoire profiling, immunostaining, lipidomics, metabolomics, microbiome, multimodal, proteomics, transcriptomics","","True","","","list like error","string_list","","","","" +"dataSubtype","General classification to differentiate between omics profiling modalities. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list, however this is largely only expected in the context of Datasets and files that contain integrated experimental data spanning multiple types.","bulk, none, pseudobulk, single-cell, single-nucleus, spatial","","True","","","list like error","string_list","","","","" +"10xProbeSetReference","Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.","Flex Human Transcriptome Probe Set v1.0.1, Flex Human Transcriptome Probe Set v1.1.0, Visium Human Transcriptome v1, Visium Human Transcriptome v2, custom probe set","","True","","","","string","","","","" +"specimenModality","Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens","multispecimen, single specimen, unknown","","True","","","","string","","","","" +"individualID","Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.","","","True","","","#ClinicalMetadataTemplate unique error^^#BiospecimenMetadataTemplate str^^list like error","string_list","","","","" +"biospecimenID","A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.","","","True","","","#BiospecimenMetadataTemplate unique error^^list like error","string_list","","","","" +"parentBiospecimenID","The biospecimenID associated with the originating biospecimen for derived or child biospecimens.","","","False","","","","string","","","","" +"biospecimenType","A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.","PBMCs, cell line, fibroblast-like synoviocyte, kidney biopsy, none, plasma, primary cell culture, saliva, salivary gland, serum, skin biopsy, skin swab, stool, suction blister cells, suction blister fluid, synovial fluid, synovial tissue, total leukocytes, urine, uvea, whole blood","","True","","","#BiospecimenMetadataTemplate str^^#InVitroBiospecimenMetadataTemplate^^list like error","string_list","","","","" +"primaryCellSource","A label indicating the biological source material from which a primary cell culture was derived.","PBMCs, kidney, pannus-derived dermis, pannus-derived epidermis, salivary gland, synovial tissue, total leukocytes, urine, uvea, whole blood","","True","","","","string","","","","" +"metadataType","A label further classifying the content of metadata resource.","assay, biospecimen, cell coordinates, data dictionary, file manifest, medication, other, phenotype, protocol, single-cell metadata, target panel, template, tissue microarray map, tissue multiarray map, user manual","","True","","","","string","","","","" +"codingLanguage","The coding, aka programming, language(s) contained in the file marked with `resourceType = code`. Select all that apply.","C, C#, C++, Fortan, Java, Julia, Matlab, Python, R, Ruby, SAS","","True","","","","string","","","","" +"visitID","Ordinal ID distinguishing different patient visits.","","","True","","","","string","","","","" +"softwareAndVersion","Relevant software and version used to generate the data file.","BD FACSDiva 8.0.1, Cell Ranger 9.0.1, Cell Ranger ATAC v1.1.0, Cell Ranger v3.0.0, Cell Ranger v3.0.1, Cell Ranger v3.0.2, Cell Ranger v3.1.0, Cell Ranger v4.0.0, Cell Ranger v5.0.0, Cell Ranger v5.0.1, Cell Ranger v6.0.0, Cell Ranger v6.0.1, Cell Ranger v6.0.2, Cell Ranger v6.1.0, Cell Ranger v6.1.1, Cell Ranger v6.1.2, Cell Ranger v7.0.0, Cell Ranger v7.0.1, Cell Ranger v7.1.0, Cell Ranger v7.2.0, Cell Ranger v8.0.0, Cell Ranger v8.0.1, Cell Ranger v9.0.0, Space Ranger 3.0.0, Space Ranger 3.0.1, Space Ranger 3.1.0, Space Ranger 3.1.1, Space Ranger 3.1.2, Space Ranger 3.1.3, demuxlet","","False","","","","string","","","","" +"targetPanel","A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.","","","True","","","","string","","","","" +"targetPanelSize","The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).","","","False","","","int error","integer","","","","" +"nucleicAcidSource","The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.","BCR mRNA, CRISPR protospacer feature barcode, TCR mRNA, Tn5-accessible gDNA, antigen capture barcode, gDNA, globin-depleted RNA, intracellular protein feature barcode, multiplexing oligo, poly(A) RNA, rRNA-depleted RNA, surface protein feature barcode","","True","","","list like error","string_list","","","","" +"biospecimenSubtype","Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.","FFPE tissue, PFA-fixed tissue, cell or tissue lysate, cell suspension, flow-sorted cells, fresh tissue, frozen tissue, nuclei suspension, supernatant","","False","","","","string","","","","" +"cellRangerOutput","10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz","Not Applicable, filtered MEX, filtered_feature_bc_matrix, filtered_peak_bc_matrix, raw MEX, raw_feature_bc_matrix, raw_peak_bc_matrix","","True","","","","string","","","","" +"alignmentReference","The genomic/transcriptomic reference used for performing read alignment against.","10x Cell Ranger Human GRCh38 2020-A, 10x Cell Ranger Human GRCh38 2024-A, GRCh38, modified GRCh38, unknown, vdj_GRCh38_alts_ensembl-4.0.0","","True","","","","string","","","","" +"libraryPrepMethod","Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.","10x Chromium Fixed RNA Human Transcriptome, 10x Chromium GEM-X Single Cell 3' v4, 10x Chromium GEM-X Single Cell 5' v3, 10x Chromium Next GEM Single Cell 3', 10x Chromium Next GEM Single Cell 3' 3.1, 10x Chromium Next GEM Single Cell 5' v1.1, 10x Chromium Next GEM Single Cell 5' v2, 10x Chromium Next GEM Single Cell ATAC v2, 10x Chromium Single Cell Human BCR, 10x Chromium Single Cell Human TCR, 10x GEM-X Flex Gene Expression Human, 10x GEM-X Universal 5' Gene Expression v3, CEL-Seq2, Chromium Next GEM Single Cell ATAC v1.1, Fluidigm C1 HT, NEBNext Human Immune Sequencing Kit, NEBNext Ultra II Directional RNA Library, Nextera XT, Nextera XT DNA, QIAseq miRNA Library, SMART-Seq Human BCR with UMI, SMART-Seq Human TCR with UMI, SMART-Seq v4 Ultra Low Input RNA, SMARTer Stranded Total RNA v2, Takara Human BCR profiling for Illumina, Takara Human TCR profiling for Illumina, Takara Human TCRv2 profiling for Illumina, Takara Human scTCR profiling for Illumina, TruSeq Stranded mRNA, custom DASH-treatment, in-house library prep","","True","","","","string","","","","" +"datasetType","High-level classification of dataset entity distinguishing between datasets compiled for a specific publication or as a general data resource.","experimental, publication","","True","","","","string","","","","" +"acknowledgmentStatement","A Dataset-specific attribute specifying the path to the wiki subpage within the ARK Portal - backend project that contains the acknowledgement statement that must be included in publications using data from the given dataset as a stipulation of the conditions of use.","syn26710600/wiki/619685","","True","","","","string","","","","" +"ARKRelease","A Dataset-specific attribute specifying the ARK Portal release in which this dataset was first made available to the public.","1.0, 2.0, 2024.06.R1, 2024.07.R1, 2024.08.R1, 2024.09.R1, 2024.10.R1, 2024.12.R1, 2025.01.R1, 2025.02.R1, 2025.03.R1, 2025.04.R1, 2025.05.R1, 2025.06.R1, 2025.07.R1, 2025.08.R1, 2025.09.R1, 2025.10.R1, 2025.11.R1, 2025.12.R1","","True","","","","string","","","","" +"RObjectClass","Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.","ROCR prediction.object, Seurat object, SummarizedExperiment, Symphony reference, data.frame, list, matrix, sparse matrix, vector","","False","","","","string","","","","" +"diagnosis","A high-level classifier indicating the disease status of an individual.","At-Risk RA, Not Applicable, OA, RA, SLE, Sjogren's disease, control, cutaneous lupus erythematosus, dermatomyositis, discoid lupus erythematosus, lupus nephritis, psoriasis, psoriatic arthritis, scleroderma, unknown, vitiligo","","True","","","list like error","string_list","","","","" +"libraryID","A library label or name, unique within an experiment, used to distinguish sequencing libraries.","","","True","","","","string","","","","" +"associatedCodeURL","A URL to the repository where associated code is available.","","","False","","","","string","uri","","","" +"FMAID","Functional Model of Anatomy ontology ID corresponding to the anatomical site and origin of the biospecimen. This attribute is assigned by ARK Portal data managers.","","","True","","https://bioportal.bioontology.org/ontologies/FMA/?p=summary","int error","integer","","","","" +"skinSiteStatus","Disease manifestation status of skin biospecimen.","healthy control, lesional, lesional proximal, non-lesional","","True","","","","string","","","","" +"salivaCollectionProcedure","Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.","stimulated, unstimulated","","True","","","","string","","","","" +"synovialCollectionProcedure","Classification of procedure for synovial tissue collection.","arthroplasty, biopsy, synovectomy, unknown","","True","","","","string","","","","" +"publicationType","General classification of publication.","correction, peer-reviewed, pre-print","","True","","","","string","","","","" +"title","Title of the publication.","","","True","","","","string","","","","" +"journal","Journal in which the publication was released","","","True","","","","string","","","","" +"publicationDate","The publication date extracted from PubMed database","","","True","","","","string","","","","" +"DOI","Digital object identifier","","","True","","","","string","uri-reference","","","" +"anatomicalSite","The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.","left 2nd MCP joint, left ankle joint, left hip joint, left knee joint, left wrist joint, other site, right 1st MTP joint, right 2nd MCP joint, right 2nd MTP joint, right 3rd MCP joint, right ankle joint, right hip joint, right knee joint, right wrist joint, unknown","","True","","","","string","","","","" +"vitiligoPattern","A high-level classification of vitiligo based on the distribution and patterning of lesions across the body.","mixed, non-segmental, segmental, unclassified","","False","","","","string","","","","" +"vitiligoPhenotype","Classification of vitiligo lesions which correlate with autoimmune activity and result in specific skin and depigmentation manifestations at the lesion site. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list.","active, confetti, inflammatory, none, trichrome","","True","","","list like error","string_list","","","","" +"psoriasisType","General type classification of psoriasis disease manifestation.","erythrodermic, guttate, inverse, plaque, pustular","","False","","","","string","","","","" +"ageDiagnosis","Age at which subject was diagnosed with `diagnosis`. If providing this value be sure the unit matches that used for `age`.","","","False","","","num error","number","","","","" +"age","Age at which subject was enrolled in study or age at corresponding visit and data collection event. If value unknown, enter '-1'.","","","True","","","num error","number","","","","" +"ageUnits","The unit of measure used for `ageEnrollment` and `ageDiagnosis`","months, years","","True","","","","string","","","","" +"sex","A textual description of a person's sex at birth.","female, intersex, male, unknown","","True","","","","string","","","","" +"height","Standing height of subject.","","","True","","","num error","number","","","","" +"race","A textual description of a person's race.","American Indian or Alaska Native, Asian, Black or African American, Hispanic, Mixed Race, Native Hawaiian or Other Pacific Islander, White, other, unknown","","False","","","","string","","","","" +"ethnicity","The ethnicity of a person.","Hispanic or Latino, Not Hispanic or Latino, unknown","","False","","","","string","","","","" +"heightUnits","Unit of measure of value provided for `height`.","centimeters, feet, inches, meters","","True","","","","string","","","","" +"weight","Weight of subject. If value unknown, enter '-1'.","","","True","","","num error","number","","","","" +"weightUnits","Abbreviated unit of measure of value provided for `weight`.","g, kg, lb, oz","","True","","","","string","","","","" +"comorbidities","Any diseases or medical condition that is simultaneously present in addition to `diagnosis`.","Hashimoto's Thyroiditis, autoimmune thyroid disease, cardiovascular disease, diabetes, inflammatory bowel disease, multiple sclerosis, other, psoriasis, psoriatic arthritis, pulmonary disease, rheumatoid arthritis, systemic lupus erythematosus","","False","","","list like error","string_list","","","","" +"diabetesType","Type of diabetes mellitus.","gestational, type 1, type 2, unknown","","False","","","","string","","","","" +"VIDA","Vitiligo Disease Activity Score, a six-point scale that evaluates the activity of vitiligo","","","False","","","","string","","","","" +"VASI","Total body Vitiligo Area Severity Index, a measure of the percentage of vitiligo involvement across the body calculated in terms of hand units.","","","False","","","","string","","","","" +"VETI","Vitiligo Extent Tensity Index measures the extent of vitiligo by a numerical score and combines analysis of extensity and severity of vitiligo to produce a constant and reproducible number.","","","False","","https://doi.org/10.5826/dpc.0404a18","num error","number","","","","" +"PASI","Psoriasis Area and Severity Index, is a measurement of the discoloration, thickness, scaling, and coverage of psoriasis plaques.","","","False","","","","string","","","","" +"CDASI","Cutaneous Dermatomyositis Disease Area and Severity Index, is a measurement used to characterize cutaneous dermatomyositis severity.","","","False","","","","string","","","","" +"inputCellCount","An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.","","","True","","","int error","integer","","","","" +"totalReads","Total number of reads sequenced from the library.","","","True","","","int error","integer","","","","" +"FACSPopulation","A description of the marker gating strategy used to derive the population cells with FACS.","","","True","","","","string","","","","" +"cellType","The cell type name from Cell Ontology for the corresponding CL identifier.","","","False","","","","string","","","","" +"userDefinedCellType","User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.","","","False","","","","string","","","","" +"krennInflammatory","A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennLining","A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennStroma","A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennSynovitisScore","The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"datasetStatus","A categorical label indicating the status of an ARK Portal dataset. This is applied to improve downstream management of datasets as well as various ETL workflows.","deprecated, released, test, under peer review, unreleased","","True","","","","string","","","","" +"sampleProcessingBatch","A label indicating batching of sample processing or preparation that occurs prior to data collection.","","","False","","","","string","","","","" +"dataCollectionBatch","A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.","","","False","","","","string","","","","" +"metadataStandards","Metadata standards used to generate the metadata","ARK data model, user-defined","","False","","","","string","","","","" +"skinSunExposure","For skin-based biospecimen, this attribute indicates whether the sample was collected from an anatomical site that does or does not receive routine sun exposure.","not sun exposed, sun exposed","","False","","","","string","","","","" +"sequencingSpikeIn","Pre-made sequencing libraries may be added to your sequencing run to improve sequencing results. If such a 'spike-in' library was used, this attribute specifies the name of the library and the percentage of the spike-in library used in the sequencing run.","","","False","","","","string","","","","" +"librarySpikeIn","Pre-made collections of nucleic acid fragments can be added to samples during the library construction process to improve downstream quantification and statistical analyses. If such a 'spike-in' was used, this attribute specifies the name and manufacturer of the spike-in.","","","False","","","","string","","","","" +"readLength","The number of base pairs (bp) sequenced for reads in a fastq file.","","","False","","","int error","integer","","","","" +"plateID","An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.","","","False","","","","string","","","","" +"processedDataType","A label used for file annotations to provide a brief description of the processed data file.","barcode counts, differential expression results, epigenomic peaks, gene counts","","False","","","list like error","string_list","","","","" +"eventCount","The total number of events detected and captured in the corresponding FCS file. In cytometry, an event corresponds to particles detected and measured by the instrument. This number corresponds to the number of rows in the FCS file and is expected to be a whole integer number.","","","False","","","int error","integer","","","","" +"notes","Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.","","","False","","","","string","","","","" +"species","The genus species of sample or subject origin.","Homo sapiens","","True","","","","string","","","","" +"treatment","A short descriptive label indicating what experimental treatments have been applied to a biospecimen before data capture. Please include the word 'control' to indicate specific treatments that are intended to serve as a control group.","","","True","","","","string","","","","" +"treatmentTimepoint","Where applicable, specify the timepoint relative to treatment to distinguish different sets of samples that have undergone the same treatment but for different lengths of time. REQUIRED: please specify the unit of time, e.g., secs, mins, hrs, days, etc.","","","False","","","","string","","","","" +"slideID","A distinct label or name, unique within an experiment, assigned to an imaging slides.","","","True","","","","string","","","","" +"altSampleID","An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.","","","False","","","","string","","","","" +"sampleCollectionBatch","A label indicating batching of sample collection or experiment execution that occurs prior to data collection.","","","False","","","","string","","","","" +"associatedAccession","This is a File and Dataset annotation attribute indicating additional accessions (i.e., unique identifiers) associated with the data when the data has also been submitted to or can be found in other repositories such as GEO, SRA, dbGaP, etc.","","","False","","","list like error","string_list","","","","" +"percentCellViability","A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.","","","True","","","inRange 50 100 error","integer","","50","100","" +"sequencingSaturation","A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.","","","False","","https://kb.10xgenomics.com/hc/en-us/articles/115005062366-What-is-sequencing-saturation","inRange 0 1 error","number","","0","1","" +"publicationSynID","The synID of the corresponding Synapse entity that stores metadata about the publication. This is used to differentiate publication-specific files, often consisting of level 4 processed data and expanded subject metadata, in a publication dataset that also includes raw or minimally processed files from experimental datasets. This provides an easy way to distinguish and select for the publication-specific data from which the research findings were derived. When this attribute is used to annotate a Dataset it serves as a way to directly link the Dataset entity with the publication metadata stored in Synapse.","","","False","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"associatedDataset","The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.","","","False","","","list like::regex search ^syn[0-9]{8} error","string_list","","","","^syn[0-9]{8}" +"custom10xProbeSetSynID","If custom modified probe sets were used for collecting 10x Chromium Flex scRNA-seq data, then the probe reference files should be grouped as a zip or tar archive and uploaded as metadata. This attribute links the experimental data back to the probe set archive file via a synapse ID.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"targetPanelSynID","In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"datasetDescription","A Dataset-specific attribute specifying the synID of the folder that contains a wiki write-up of the dataset description. This wiki content will be surfaced on the ARK Portal frontend site.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"DOID","Disease ontology identifier associated with `diagnosis`. Attribute values are applied by ARK Portal data managers.","","","True","","","regex search ^DOID error","string","","","","^DOID" +"BRENDA","BRENDA Tissue and Enzyme Source Ontology ID (BTO) corresponding to the `biospecimenType`. Attribute values are applied by ARK Portal data managers.","","","True","","https://bioportal.bioontology.org/ontologies/BTO","regex search ^BTO error","string","","","","^BTO" +"year","Year (YYYY) in which the paper was published.","","","True","","","regex search [1-2][0-9]{3} error","string","","","","[1-2][0-9]{3}" +"PMID","PubMed(R) Identifier","","","True","","","regex search ^PMID error","string","","","","^PMID" +"ImmPortAccession","Accession to corresponding information in ImmPort.","","","False","","","regex search ^SDY error","string","","","","^SDY" +"cellOntologyID","Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.","","","False","","","regex search ^CL: error","string","","","","^CL:" +"PMCID","Pubmed Central Identifier, formatted as a compact URI string that will automatically resolve into a URL based on the Synapse bioregistry; e.g., pmc:PMCxxxxxxxx","","","False","","","regex search ^pmc:PMC[0-9]{8} error","string","","","","^pmc:PMC[0-9]{8}" diff --git a/model_contexts/fcs/ark.fcs_context.csv b/model_contexts/fcs/ark.fcs_context.csv index 40713425..0c2518e0 100644 --- a/model_contexts/fcs/ark.fcs_context.csv +++ b/model_contexts/fcs/ark.fcs_context.csv @@ -1,11 +1,11 @@ -"Attribute","Description","Valid Values","DependsOn","Properties","Required","Parent","DependsOn Component","Source","Validation Rules","columnType" -"FCS File Annotation Template","A template outlining metadata to be collected and applied to FCS file entities as annotations in Synapse.","","Component, fileFormat, specimenModality, assay, eventCount","","","","","","","" -"BDM FCS File Annotations","A template outlining metadata to be collected and applied to FCS file entities as annotations in Synapse by BDM team as part of data release process.","","Component, fileFormat, resourceType, assay, specimenModality, eventCount, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, platform, sampleProcessingBatch, dataCollectionBatch, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","" -"multispecimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","sampleProcessingBatch, dataCollectionBatch","","","","","","","" -"single specimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","biospecimenID, individualID","","","","","","","" -"resourceType","","experimental data","","","TRUE","","","","","string" -"fileFormat","","fcs","","","","","","","","string" -"assay","The technology used to generate the data in this file.","CyTOF, flow cytometry","","","TRUE","","","","","string" -"platform","The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment.","Cytek Aurora, Cytek Aurora Evo, BD FACSDiscover S8, BD FACSymphony S6, BD FACSMelody, BD FACSAria III, BD FACSCanto, BD FACSLyric Clinical, BD FACSCanto II, BD FACSDiscover A8, Thermo Fisher Attune Xenith, Thermo Fisher Attune CytPix, Thermo Fisher Attune NxT, Sony MA900, BD LSRFortessa, unknown","","","TRUE","","","","","string" -"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" -"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" +"Attribute","Description","Valid Values","DependsOn","Required","Parent","Source","Validation Rules","columnType","Format","Minimum","Maximum","Pattern" +"FCS File Annotation Template","A template outlining metadata to be collected and applied to FCS file entities as annotations in Synapse.","","Component, fileFormat, specimenModality, assay, eventCount","","","","","","","","","" +"BDM FCS File Annotations","A template outlining metadata to be collected and applied to FCS file entities as annotations in Synapse by BDM team as part of data release process.","","Component, fileFormat, resourceType, assay, specimenModality, eventCount, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, platform, sampleProcessingBatch, dataCollectionBatch, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","","","" +"multispecimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","sampleProcessingBatch, dataCollectionBatch","","","","","","","","","" +"single specimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","biospecimenID, individualID","","","","","","","","","" +"resourceType","","experimental data","","TRUE","","","","string","","","","" +"fileFormat","","fcs","","","","","","string","","","","" +"assay","The technology used to generate the data in this file.","CyTOF, flow cytometry","","TRUE","","","","string","","","","" +"platform","The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment.","Cytek Aurora, Cytek Aurora Evo, BD FACSDiscover S8, BD FACSymphony S6, BD FACSMelody, BD FACSAria III, BD FACSCanto, BD FACSLyric Clinical, BD FACSCanto II, BD FACSDiscover A8, Thermo Fisher Attune Xenith, Thermo Fisher Attune CytPix, Thermo Fisher Attune NxT, Sony MA900, BD LSRFortessa, unknown","","TRUE","","","","string","","","","" +"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" +"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" diff --git a/model_contexts/fcs/ark.fcs_model.csv b/model_contexts/fcs/ark.fcs_model.csv index f8e008d6..a90e95c0 100644 --- a/model_contexts/fcs/ark.fcs_model.csv +++ b/model_contexts/fcs/ark.fcs_model.csv @@ -1,115 +1,115 @@ -"Attribute","Description","Valid Values","DependsOn","Properties","Required","Parent","DependsOn Component","Source","Validation Rules","columnType" -"FCS File Annotation Template","A template outlining metadata to be collected and applied to FCS file entities as annotations in Synapse.","","Component, fileFormat, specimenModality, assay, eventCount","","","","","","","" -"BDM FCS File Annotations","A template outlining metadata to be collected and applied to FCS file entities as annotations in Synapse by BDM team as part of data release process.","","Component, fileFormat, resourceType, assay, specimenModality, eventCount, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, platform, sampleProcessingBatch, dataCollectionBatch, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","" -"multispecimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","sampleProcessingBatch, dataCollectionBatch","","","","","","","" -"single specimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","biospecimenID, individualID","","","","","","","" -"resourceType","High-level classification of the file content","experimental data","","","TRUE","","","","","string" -"fileFormat","Standard file format name or file extension","fcs","","","","","","","","string" -"assay","The technology used to generate the data in this file.","CyTOF, flow cytometry","","","TRUE","","","","","string" -"platform","The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment.","Cytek Aurora, Cytek Aurora Evo, BD FACSDiscover S8, BD FACSymphony S6, BD FACSMelody, BD FACSAria III, BD FACSCanto, BD FACSLyric Clinical, BD FACSCanto II, BD FACSDiscover A8, Thermo Fisher Attune Xenith, Thermo Fisher Attune CytPix, Thermo Fisher Attune NxT, Sony MA900, BD LSRFortessa, unknown","","","TRUE","","","","","string" -"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" -"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" -"Component","A high-level attribute for grouping attributes into templates.","","","","True","","","","","string" -"publicationSynID","The synID of the corresponding Synapse entity that stores metadata about the publication. This is used to differentiate publication-specific files, often consisting of level 4 processed data and expanded subject metadata, in a publication dataset that also includes raw or minimally processed files from experimental datasets. This provides an easy way to distinguish and select for the publication-specific data from which the research findings were derived. When this attribute is used to annotate a Dataset it serves as a way to directly link the Dataset entity with the publication metadata stored in Synapse.","","","","False","","","","regex search ^syn[0-9]{8} error","string" -"associatedDataset","The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.","","","","False","","","","list like::regex search ^syn[0-9]{8} error","string" -"program","Name of the funding program that supported the generation of data and associated files","AMP AIM, AMP RA/SLE, Community Contribution","","","True","","","","","string" -"project","A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.","AIM for RA, ELLIPSS, LOCKIT, RA, SLE, STAMP, UMass V-CoRT","","","True","","","","list like error","string" -"programPhase","A label noting which AMP RA/SLE program phase generated the data.","I, II","","","True","","","","list like error","string" -"dataType","High-level classification of the type of data contained in the file, loosely related to the experimental method or biological entity that is being profiled. Select all that apply using a comma-delimited list, though in most cases only a single label is expected. For multimodal datasets with concomitant profiling of biospecimen include 'multimodal'.","cytometry, epigenomics, genomics, histology, immune repertoire profiling, immunostaining, lipidomics, metabolomics, microbiome, multimodal, proteomics, transcriptomics","","","True","","","","list like error","string" -"dataSubtype","General classification to differentiate between omics profiling modalities. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list, however this is largely only expected in the context of Datasets and files that contain integrated experimental data spanning multiple types.","bulk, none, pseudobulk, single-cell, single-nucleus, spatial","","","True","","","","list like error","string" -"dataLevel","Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.","1, 2, 3, 4, 5","","","True","","","","","string" -"10xProbeSetReference","Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.","Flex Human Transcriptome Probe Set v1.0.1, Flex Human Transcriptome Probe Set v1.1.0, Visium Human Transcriptome v1, Visium Human Transcriptome v2, custom probe set","","","True","","","","","string" -"custom10xProbeSetSynID","If custom modified probe sets were used for collecting 10x Chromium Flex scRNA-seq data, then the probe reference files should be grouped as a zip or tar archive and uploaded as metadata. This attribute links the experimental data back to the probe set archive file via a synapse ID.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"specimenModality","Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens","multispecimen, single specimen, unknown","","","True","","","","","string" -"individualID","Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.","","","","True","","","","#ClinicalMetadataTemplate unique error^^#BiospecimenMetadataTemplate str^^list like error","string" -"biospecimenID","A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.","","","","True","","","","#BiospecimenMetadataTemplate unique error^^list like error","string" -"parentBiospecimenID","The biospecimenID associated with the originating biospecimen for derived or child biospecimens.","","","","False","","","","","string" -"biospecimenType","A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.","PBMCs, cell line, fibroblast-like synoviocyte, kidney biopsy, none, plasma, primary cell culture, saliva, salivary gland, serum, skin biopsy, skin swab, stool, suction blister cells, suction blister fluid, synovial fluid, synovial tissue, total leukocytes, urine, uvea, whole blood","","","True","","","","#BiospecimenMetadataTemplate str^^#InVitroBiospecimenMetadataTemplate^^list like error","string" -"primaryCellSource","A label indicating the biological source material from which a primary cell culture was derived.","PBMCs, kidney, pannus-derived dermis, pannus-derived epidermis, salivary gland, synovial tissue, total leukocytes, urine, uvea, whole blood","","","True","","","","","string" -"metadataType","A label further classifying the content of metadata resource.","assay, biospecimen, cell coordinates, data dictionary, file manifest, medication, other, phenotype, protocol, single-cell metadata, target panel, template, tissue microarray map, tissue multiarray map, user manual","","","True","","","","","string" -"codingLanguage","The coding, aka programming, language(s) contained in the file marked with `resourceType = code`. Select all that apply.","C, C#, C++, Fortan, Java, Julia, Matlab, Python, R, Ruby, SAS","","","True","","","","","string" -"visitID","Ordinal ID distinguishing different patient visits.","","","","True","","","","","string" -"softwareAndVersion","Relevant software and version used to generate the data file.","BD FACSDiva 8.0.1, Cell Ranger 9.0.1, Cell Ranger ATAC v1.1.0, Cell Ranger v3.0.0, Cell Ranger v3.0.1, Cell Ranger v3.0.2, Cell Ranger v3.1.0, Cell Ranger v4.0.0, Cell Ranger v5.0.0, Cell Ranger v5.0.1, Cell Ranger v6.0.0, Cell Ranger v6.0.1, Cell Ranger v6.0.2, Cell Ranger v6.1.0, Cell Ranger v6.1.1, Cell Ranger v6.1.2, Cell Ranger v7.0.0, Cell Ranger v7.0.1, Cell Ranger v7.1.0, Cell Ranger v7.2.0, Cell Ranger v8.0.0, Cell Ranger v8.0.1, Cell Ranger v9.0.0, Space Ranger 3.0.0, Space Ranger 3.0.1, Space Ranger 3.1.0, Space Ranger 3.1.1, Space Ranger 3.1.2, Space Ranger 3.1.3, demuxlet","","","False","","","","","string" -"targetPanel","A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.","","","","True","","","","","string" -"targetPanelSynID","In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"targetPanelSize","The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).","","","","False","","","","int error","integer" -"nucleicAcidSource","The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.","BCR mRNA, CRISPR protospacer feature barcode, TCR mRNA, Tn5-accessible gDNA, antigen capture barcode, gDNA, globin-depleted RNA, intracellular protein feature barcode, multiplexing oligo, poly(A) RNA, rRNA-depleted RNA, surface protein feature barcode","","","True","","","","list like error","string" -"biospecimenSubtype","Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.","FFPE tissue, PFA-fixed tissue, cell or tissue lysate, cell suspension, flow-sorted cells, fresh tissue, frozen tissue, nuclei suspension, supernatant","","","False","","","","","string" -"cellRangerOutput","10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz","Not Applicable, filtered MEX, filtered_feature_bc_matrix, filtered_peak_bc_matrix, raw MEX, raw_feature_bc_matrix, raw_peak_bc_matrix","","","True","","","","","string" -"alignmentReference","The genomic/transcriptomic reference used for performing read alignment against.","10x Cell Ranger Human GRCh38 2020-A, 10x Cell Ranger Human GRCh38 2024-A, GRCh38, modified GRCh38, unknown, vdj_GRCh38_alts_ensembl-4.0.0","","","True","","","","","string" -"libraryPrepMethod","Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.","10x Chromium Fixed RNA Human Transcriptome, 10x Chromium GEM-X Single Cell 3' v4, 10x Chromium GEM-X Single Cell 5' v3, 10x Chromium Next GEM Single Cell 3', 10x Chromium Next GEM Single Cell 3' 3.1, 10x Chromium Next GEM Single Cell 5' v1.1, 10x Chromium Next GEM Single Cell 5' v2, 10x Chromium Next GEM Single Cell ATAC v2, 10x Chromium Single Cell Human BCR, 10x Chromium Single Cell Human TCR, 10x GEM-X Flex Gene Expression Human, 10x GEM-X Universal 5' Gene Expression v3, CEL-Seq2, Chromium Next GEM Single Cell ATAC v1.1, Fluidigm C1 HT, NEBNext Human Immune Sequencing Kit, NEBNext Ultra II Directional RNA Library, Nextera XT, Nextera XT DNA, QIAseq miRNA Library, SMART-Seq Human BCR with UMI, SMART-Seq Human TCR with UMI, SMART-Seq v4 Ultra Low Input RNA, SMARTer Stranded Total RNA v2, Takara Human BCR profiling for Illumina, Takara Human TCR profiling for Illumina, Takara Human TCRv2 profiling for Illumina, Takara Human scTCR profiling for Illumina, TruSeq Stranded mRNA, custom DASH-treatment, in-house library prep","","","True","","","","","string" -"datasetType","High-level classification of dataset entity distinguishing between datasets compiled for a specific publication or as a general data resource.","experimental, publication","","","True","","","","","string" -"acknowledgmentStatement","A Dataset-specific attribute specifying the path to the wiki subpage within the ARK Portal - backend project that contains the acknowledgement statement that must be included in publications using data from the given dataset as a stipulation of the conditions of use.","syn26710600/wiki/619685","","","True","","","","","string" -"datasetDescription","A Dataset-specific attribute specifying the synID of the folder that contains a wiki write-up of the dataset description. This wiki content will be surfaced on the ARK Portal frontend site.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"ARKRelease","A Dataset-specific attribute specifying the ARK Portal release in which this dataset was first made available to the public.","1.0, 2.0, 2024.06.R1, 2024.07.R1, 2024.08.R1, 2024.09.R1, 2024.10.R1, 2024.12.R1, 2025.01.R1, 2025.02.R1, 2025.03.R1, 2025.04.R1, 2025.05.R1, 2025.06.R1, 2025.07.R1, 2025.08.R1, 2025.09.R1, 2025.10.R1, 2025.11.R1, 2025.12.R1","","","True","","","","","string" -"RObjectClass","Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.","ROCR prediction.object, Seurat object, SummarizedExperiment, Symphony reference, data.frame, list, matrix, sparse matrix, vector","","","False","","","","","string" -"diagnosis","A high-level classifier indicating the disease status of an individual.","At-Risk RA, Not Applicable, OA, RA, SLE, Sjogren's disease, control, cutaneous lupus erythematosus, dermatomyositis, discoid lupus erythematosus, lupus nephritis, psoriasis, psoriatic arthritis, scleroderma, unknown, vitiligo","","","True","","","","list like error","string" -"DOID","Disease ontology identifier associated with `diagnosis`. Attribute values are applied by ARK Portal data managers.","","","","True","","","","regex search ^DOID error","string" -"libraryID","A library label or name, unique within an experiment, used to distinguish sequencing libraries.","","","","True","","","","","string" -"associatedCodeURL","A URL to the repository where associated code is available.","","","","False","","","","","string" -"FMAID","Functional Model of Anatomy ontology ID corresponding to the anatomical site and origin of the biospecimen. This attribute is assigned by ARK Portal data managers.","","","","True","","","https://bioportal.bioontology.org/ontologies/FMA/?p=summary","int error","integer" -"BRENDA","BRENDA Tissue and Enzyme Source Ontology ID (BTO) corresponding to the `biospecimenType`. Attribute values are applied by ARK Portal data managers.","","","","True","","","https://bioportal.bioontology.org/ontologies/BTO","regex search ^BTO error","string" -"skinSiteStatus","Disease manifestation status of skin biospecimen.","healthy control, lesional, lesional proximal, non-lesional","","","True","","","","","string" -"salivaCollectionProcedure","Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.","stimulated, unstimulated","","","True","","","","","string" -"synovialCollectionProcedure","Classification of procedure for synovial tissue collection.","arthroplasty, biopsy, synovectomy, unknown","","","True","","","","","string" -"publicationType","General classification of publication.","correction, peer-reviewed, pre-print","","","True","","","","","string" -"title","Title of the publication.","","","","True","","","","","string" -"journal","Journal in which the publication was released","","","","True","","","","","string" -"year","Year (YYYY) in which the paper was published.","","","","True","","","","regex search [1-2][0-9]{3} error","string" -"publicationDate","The publication date extracted from PubMed database","","","","True","","","","","string" -"PMID","PubMed(R) Identifier","","","","True","","","","regex search ^PMID error","string" -"DOI","Digital object identifier","","","","True","","","","","string" -"anatomicalSite","The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.","left 2nd MCP joint, left ankle joint, left hip joint, left knee joint, left wrist joint, other site, right 1st MTP joint, right 2nd MCP joint, right 2nd MTP joint, right 3rd MCP joint, right ankle joint, right hip joint, right knee joint, right wrist joint, unknown","","","True","","","","","string" -"vitiligoPattern","A high-level classification of vitiligo based on the distribution and patterning of lesions across the body.","mixed, non-segmental, segmental, unclassified","","","False","","","","","string" -"vitiligoPhenotype","Classification of vitiligo lesions which correlate with autoimmune activity and result in specific skin and depigmentation manifestations at the lesion site. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list.","active, confetti, inflammatory, none, trichrome","","","True","","","","list like error","string" -"psoriasisType","General type classification of psoriasis disease manifestation.","erythrodermic, guttate, inverse, plaque, pustular","","","False","","","","","string" -"ageDiagnosis","Age at which subject was diagnosed with `diagnosis`. If providing this value be sure the unit matches that used for `age`.","","","","False","","","","num error","number" -"age","Age at which subject was enrolled in study or age at corresponding visit and data collection event. If value unknown, enter '-1'.","","","","True","","","","num error","number" -"ageUnits","The unit of measure used for `ageEnrollment` and `ageDiagnosis`","months, years","","","True","","","","","string" -"sex","A textual description of a person's sex at birth.","female, intersex, male, unknown","","","True","","","","","string" -"height","Standing height of subject.","","","","True","","","","num error","number" -"race","A textual description of a person's race.","American Indian or Alaska Native, Asian, Black or African American, Hispanic, Mixed Race, Native Hawaiian or Other Pacific Islander, White, other, unknown","","","False","","","","","string" -"ethnicity","The ethnicity of a person.","Hispanic or Latino, Not Hispanic or Latino, unknown","","","False","","","","","string" -"heightUnits","Unit of measure of value provided for `height`.","centimeters, feet, inches, meters","","","True","","","","","string" -"weight","Weight of subject. If value unknown, enter '-1'.","","","","True","","","","num error","number" -"weightUnits","Abbreviated unit of measure of value provided for `weight`.","g, kg, lb, oz","","","True","","","","","string" -"comorbidities","Any diseases or medical condition that is simultaneously present in addition to `diagnosis`.","Hashimoto's Thyroiditis, autoimmune thyroid disease, cardiovascular disease, diabetes, inflammatory bowel disease, multiple sclerosis, other, psoriasis, psoriatic arthritis, pulmonary disease, rheumatoid arthritis, systemic lupus erythematosus","","","False","","","","list like error","string" -"diabetesType","Type of diabetes mellitus.","gestational, type 1, type 2, unknown","","","False","","","","","string" -"VIDA","Vitiligo Disease Activity Score, a six-point scale that evaluates the activity of vitiligo","","","","False","","","","","string" -"VASI","Total body Vitiligo Area Severity Index, a measure of the percentage of vitiligo involvement across the body calculated in terms of hand units.","","","","False","","","","","string" -"VETI","Vitiligo Extent Tensity Index measures the extent of vitiligo by a numerical score and combines analysis of extensity and severity of vitiligo to produce a constant and reproducible number.","","","","False","","","https://doi.org/10.5826/dpc.0404a18","num error","number" -"PASI","Psoriasis Area and Severity Index, is a measurement of the discoloration, thickness, scaling, and coverage of psoriasis plaques.","","","","False","","","","","string" -"CDASI","Cutaneous Dermatomyositis Disease Area and Severity Index, is a measurement used to characterize cutaneous dermatomyositis severity.","","","","False","","","","","string" -"percentCellViability","A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.","","","","True","","","","inRange 50 100 error","integer" -"inputCellCount","An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.","","","","True","","","","int error","integer" -"totalReads","Total number of reads sequenced from the library.","","","","True","","","","int error","integer" -"sequencingSaturation","A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.","","","","False","","","https://kb.10xgenomics.com/hc/en-us/articles/115005062366-What-is-sequencing-saturation","inRange 0 1 error","number" -"ImmPortAccession","Accession to corresponding information in ImmPort.","","","","False","","","","regex search ^SDY error","string" -"FACSPopulation","A description of the marker gating strategy used to derive the population cells with FACS.","","","","True","","","","","string" -"cellOntologyID","Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.","","","","False","","","","regex search ^CL: error","string" -"cellType","The cell type name from Cell Ontology for the corresponding CL identifier.","","","","False","","","","","string" -"userDefinedCellType","User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.","","","","False","","","","","string" -"krennInflammatory","A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennLining","A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennStroma","A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennSynovitisScore","The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"datasetStatus","A categorical label indicating the status of an ARK Portal dataset. This is applied to improve downstream management of datasets as well as various ETL workflows.","deprecated, released, test, under peer review, unreleased","","","True","","","","","string" -"sampleProcessingBatch","A label indicating batching of sample processing or preparation that occurs prior to data collection.","","","","False","","","","","string" -"dataCollectionBatch","A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.","","","","False","","","","","string" -"metadataStandards","Metadata standards used to generate the metadata","ARK data model, user-defined","","","False","","","","","string" -"skinSunExposure","For skin-based biospecimen, this attribute indicates whether the sample was collected from an anatomical site that does or does not receive routine sun exposure.","not sun exposed, sun exposed","","","False","","","","","string" -"sequencingSpikeIn","Pre-made sequencing libraries may be added to your sequencing run to improve sequencing results. If such a 'spike-in' library was used, this attribute specifies the name of the library and the percentage of the spike-in library used in the sequencing run.","","","","False","","","","","string" -"librarySpikeIn","Pre-made collections of nucleic acid fragments can be added to samples during the library construction process to improve downstream quantification and statistical analyses. If such a 'spike-in' was used, this attribute specifies the name and manufacturer of the spike-in.","","","","False","","","","","string" -"readLength","The number of base pairs (bp) sequenced for reads in a fastq file.","","","","False","","","","int error","integer" -"plateID","An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.","","","","False","","","","","string" -"PMCID","Pubmed Central Identifier, formatted as a compact URI string that will automatically resolve into a URL based on the Synapse bioregistry; e.g., pmc:PMCxxxxxxxx","","","","False","","","","regex search ^pmc:PMC[0-9]{8} error","string" -"processedDataType","A label used for file annotations to provide a brief description of the processed data file.","barcode counts, differential expression results, epigenomic peaks, gene counts","","","False","","","","list like error","string" -"eventCount","The total number of events detected and captured in the corresponding FCS file. In cytometry, an event corresponds to particles detected and measured by the instrument. This number corresponds to the number of rows in the FCS file and is expected to be a whole integer number.","","","","False","","","","int error","integer" -"notes","Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.","","","","False","","","","","string" -"species","The genus species of sample or subject origin.","Homo sapiens","","","True","","","","","string" -"treatment","A short descriptive label indicating what experimental treatments have been applied to a biospecimen before data capture. Please include the word 'control' to indicate specific treatments that are intended to serve as a control group.","","","","True","","","","","string" -"treatmentTimepoint","Where applicable, specify the timepoint relative to treatment to distinguish different sets of samples that have undergone the same treatment but for different lengths of time. REQUIRED: please specify the unit of time, e.g., secs, mins, hrs, days, etc.","","","","False","","","","","string" -"slideID","A distinct label or name, unique within an experiment, assigned to an imaging slides.","","","","True","","","","","string" -"altSampleID","An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.","","","","False","","","","","string" -"sampleCollectionBatch","A label indicating batching of sample collection or experiment execution that occurs prior to data collection.","","","","False","","","","","string" -"associatedAccession","This is a File and Dataset annotation attribute indicating additional accessions (i.e., unique identifiers) associated with the data when the data has also been submitted to or can be found in other repositories such as GEO, SRA, dbGaP, etc.","","","","False","","","","list like error","string" +"Attribute","Description","Valid Values","DependsOn","Required","Parent","Source","Validation Rules","columnType","Format","Minimum","Maximum","Pattern" +"FCS File Annotation Template","A template outlining metadata to be collected and applied to FCS file entities as annotations in Synapse.","","Component, fileFormat, specimenModality, assay, eventCount","","","","","","","","","" +"BDM FCS File Annotations","A template outlining metadata to be collected and applied to FCS file entities as annotations in Synapse by BDM team as part of data release process.","","Component, fileFormat, resourceType, assay, specimenModality, eventCount, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, platform, sampleProcessingBatch, dataCollectionBatch, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","","","" +"multispecimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","sampleProcessingBatch, dataCollectionBatch","","","","","","","","","" +"single specimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","biospecimenID, individualID","","","","","","","","","" +"resourceType","High-level classification of the file content","experimental data","","TRUE","","","","string","","","","" +"fileFormat","Standard file format name or file extension","fcs","","","","","","string","","","","" +"assay","The technology used to generate the data in this file.","CyTOF, flow cytometry","","TRUE","","","","string","","","","" +"platform","The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment.","Cytek Aurora, Cytek Aurora Evo, BD FACSDiscover S8, BD FACSymphony S6, BD FACSMelody, BD FACSAria III, BD FACSCanto, BD FACSLyric Clinical, BD FACSCanto II, BD FACSDiscover A8, Thermo Fisher Attune Xenith, Thermo Fisher Attune CytPix, Thermo Fisher Attune NxT, Sony MA900, BD LSRFortessa, unknown","","TRUE","","","","string","","","","" +"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" +"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" +"Component","A high-level attribute for grouping attributes into templates.","","","True","","","","string","","","","" +"program","Name of the funding program that supported the generation of data and associated files","AMP AIM, AMP RA/SLE, Community Contribution","","True","","","","string","","","","" +"project","A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.","AIM for RA, ELLIPSS, LOCKIT, RA, SLE, STAMP, UMass V-CoRT","","True","","","list like error","string_list","","","","" +"programPhase","A label noting which AMP RA/SLE program phase generated the data.","I, II","","True","","","list like error","string_list","","","","" +"dataType","High-level classification of the type of data contained in the file, loosely related to the experimental method or biological entity that is being profiled. Select all that apply using a comma-delimited list, though in most cases only a single label is expected. For multimodal datasets with concomitant profiling of biospecimen include 'multimodal'.","cytometry, epigenomics, genomics, histology, immune repertoire profiling, immunostaining, lipidomics, metabolomics, microbiome, multimodal, proteomics, transcriptomics","","True","","","list like error","string_list","","","","" +"dataSubtype","General classification to differentiate between omics profiling modalities. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list, however this is largely only expected in the context of Datasets and files that contain integrated experimental data spanning multiple types.","bulk, none, pseudobulk, single-cell, single-nucleus, spatial","","True","","","list like error","string_list","","","","" +"dataLevel","Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.","1, 2, 3, 4, 5","","True","","","","string","","","","" +"10xProbeSetReference","Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.","Flex Human Transcriptome Probe Set v1.0.1, Flex Human Transcriptome Probe Set v1.1.0, Visium Human Transcriptome v1, Visium Human Transcriptome v2, custom probe set","","True","","","","string","","","","" +"specimenModality","Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens","multispecimen, single specimen, unknown","","True","","","","string","","","","" +"individualID","Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.","","","True","","","#ClinicalMetadataTemplate unique error^^#BiospecimenMetadataTemplate str^^list like error","string_list","","","","" +"biospecimenID","A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.","","","True","","","#BiospecimenMetadataTemplate unique error^^list like error","string_list","","","","" +"parentBiospecimenID","The biospecimenID associated with the originating biospecimen for derived or child biospecimens.","","","False","","","","string","","","","" +"biospecimenType","A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.","PBMCs, cell line, fibroblast-like synoviocyte, kidney biopsy, none, plasma, primary cell culture, saliva, salivary gland, serum, skin biopsy, skin swab, stool, suction blister cells, suction blister fluid, synovial fluid, synovial tissue, total leukocytes, urine, uvea, whole blood","","True","","","#BiospecimenMetadataTemplate str^^#InVitroBiospecimenMetadataTemplate^^list like error","string_list","","","","" +"primaryCellSource","A label indicating the biological source material from which a primary cell culture was derived.","PBMCs, kidney, pannus-derived dermis, pannus-derived epidermis, salivary gland, synovial tissue, total leukocytes, urine, uvea, whole blood","","True","","","","string","","","","" +"metadataType","A label further classifying the content of metadata resource.","assay, biospecimen, cell coordinates, data dictionary, file manifest, medication, other, phenotype, protocol, single-cell metadata, target panel, template, tissue microarray map, tissue multiarray map, user manual","","True","","","","string","","","","" +"codingLanguage","The coding, aka programming, language(s) contained in the file marked with `resourceType = code`. Select all that apply.","C, C#, C++, Fortan, Java, Julia, Matlab, Python, R, Ruby, SAS","","True","","","","string","","","","" +"visitID","Ordinal ID distinguishing different patient visits.","","","True","","","","string","","","","" +"softwareAndVersion","Relevant software and version used to generate the data file.","BD FACSDiva 8.0.1, Cell Ranger 9.0.1, Cell Ranger ATAC v1.1.0, Cell Ranger v3.0.0, Cell Ranger v3.0.1, Cell Ranger v3.0.2, Cell Ranger v3.1.0, Cell Ranger v4.0.0, Cell Ranger v5.0.0, Cell Ranger v5.0.1, Cell Ranger v6.0.0, Cell Ranger v6.0.1, Cell Ranger v6.0.2, Cell Ranger v6.1.0, Cell Ranger v6.1.1, Cell Ranger v6.1.2, Cell Ranger v7.0.0, Cell Ranger v7.0.1, Cell Ranger v7.1.0, Cell Ranger v7.2.0, Cell Ranger v8.0.0, Cell Ranger v8.0.1, Cell Ranger v9.0.0, Space Ranger 3.0.0, Space Ranger 3.0.1, Space Ranger 3.1.0, Space Ranger 3.1.1, Space Ranger 3.1.2, Space Ranger 3.1.3, demuxlet","","False","","","","string","","","","" +"targetPanel","A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.","","","True","","","","string","","","","" +"targetPanelSize","The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).","","","False","","","int error","integer","","","","" +"nucleicAcidSource","The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.","BCR mRNA, CRISPR protospacer feature barcode, TCR mRNA, Tn5-accessible gDNA, antigen capture barcode, gDNA, globin-depleted RNA, intracellular protein feature barcode, multiplexing oligo, poly(A) RNA, rRNA-depleted RNA, surface protein feature barcode","","True","","","list like error","string_list","","","","" +"biospecimenSubtype","Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.","FFPE tissue, PFA-fixed tissue, cell or tissue lysate, cell suspension, flow-sorted cells, fresh tissue, frozen tissue, nuclei suspension, supernatant","","False","","","","string","","","","" +"cellRangerOutput","10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz","Not Applicable, filtered MEX, filtered_feature_bc_matrix, filtered_peak_bc_matrix, raw MEX, raw_feature_bc_matrix, raw_peak_bc_matrix","","True","","","","string","","","","" +"alignmentReference","The genomic/transcriptomic reference used for performing read alignment against.","10x Cell Ranger Human GRCh38 2020-A, 10x Cell Ranger Human GRCh38 2024-A, GRCh38, modified GRCh38, unknown, vdj_GRCh38_alts_ensembl-4.0.0","","True","","","","string","","","","" +"libraryPrepMethod","Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.","10x Chromium Fixed RNA Human Transcriptome, 10x Chromium GEM-X Single Cell 3' v4, 10x Chromium GEM-X Single Cell 5' v3, 10x Chromium Next GEM Single Cell 3', 10x Chromium Next GEM Single Cell 3' 3.1, 10x Chromium Next GEM Single Cell 5' v1.1, 10x Chromium Next GEM Single Cell 5' v2, 10x Chromium Next GEM Single Cell ATAC v2, 10x Chromium Single Cell Human BCR, 10x Chromium Single Cell Human TCR, 10x GEM-X Flex Gene Expression Human, 10x GEM-X Universal 5' Gene Expression v3, CEL-Seq2, Chromium Next GEM Single Cell ATAC v1.1, Fluidigm C1 HT, NEBNext Human Immune Sequencing Kit, NEBNext Ultra II Directional RNA Library, Nextera XT, Nextera XT DNA, QIAseq miRNA Library, SMART-Seq Human BCR with UMI, SMART-Seq Human TCR with UMI, SMART-Seq v4 Ultra Low Input RNA, SMARTer Stranded Total RNA v2, Takara Human BCR profiling for Illumina, Takara Human TCR profiling for Illumina, Takara Human TCRv2 profiling for Illumina, Takara Human scTCR profiling for Illumina, TruSeq Stranded mRNA, custom DASH-treatment, in-house library prep","","True","","","","string","","","","" +"datasetType","High-level classification of dataset entity distinguishing between datasets compiled for a specific publication or as a general data resource.","experimental, publication","","True","","","","string","","","","" +"acknowledgmentStatement","A Dataset-specific attribute specifying the path to the wiki subpage within the ARK Portal - backend project that contains the acknowledgement statement that must be included in publications using data from the given dataset as a stipulation of the conditions of use.","syn26710600/wiki/619685","","True","","","","string","","","","" +"ARKRelease","A Dataset-specific attribute specifying the ARK Portal release in which this dataset was first made available to the public.","1.0, 2.0, 2024.06.R1, 2024.07.R1, 2024.08.R1, 2024.09.R1, 2024.10.R1, 2024.12.R1, 2025.01.R1, 2025.02.R1, 2025.03.R1, 2025.04.R1, 2025.05.R1, 2025.06.R1, 2025.07.R1, 2025.08.R1, 2025.09.R1, 2025.10.R1, 2025.11.R1, 2025.12.R1","","True","","","","string","","","","" +"RObjectClass","Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.","ROCR prediction.object, Seurat object, SummarizedExperiment, Symphony reference, data.frame, list, matrix, sparse matrix, vector","","False","","","","string","","","","" +"diagnosis","A high-level classifier indicating the disease status of an individual.","At-Risk RA, Not Applicable, OA, RA, SLE, Sjogren's disease, control, cutaneous lupus erythematosus, dermatomyositis, discoid lupus erythematosus, lupus nephritis, psoriasis, psoriatic arthritis, scleroderma, unknown, vitiligo","","True","","","list like error","string_list","","","","" +"libraryID","A library label or name, unique within an experiment, used to distinguish sequencing libraries.","","","True","","","","string","","","","" +"associatedCodeURL","A URL to the repository where associated code is available.","","","False","","","","string","uri","","","" +"FMAID","Functional Model of Anatomy ontology ID corresponding to the anatomical site and origin of the biospecimen. This attribute is assigned by ARK Portal data managers.","","","True","","https://bioportal.bioontology.org/ontologies/FMA/?p=summary","int error","integer","","","","" +"skinSiteStatus","Disease manifestation status of skin biospecimen.","healthy control, lesional, lesional proximal, non-lesional","","True","","","","string","","","","" +"salivaCollectionProcedure","Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.","stimulated, unstimulated","","True","","","","string","","","","" +"synovialCollectionProcedure","Classification of procedure for synovial tissue collection.","arthroplasty, biopsy, synovectomy, unknown","","True","","","","string","","","","" +"publicationType","General classification of publication.","correction, peer-reviewed, pre-print","","True","","","","string","","","","" +"title","Title of the publication.","","","True","","","","string","","","","" +"journal","Journal in which the publication was released","","","True","","","","string","","","","" +"publicationDate","The publication date extracted from PubMed database","","","True","","","","string","","","","" +"DOI","Digital object identifier","","","True","","","","string","uri-reference","","","" +"anatomicalSite","The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.","left 2nd MCP joint, left ankle joint, left hip joint, left knee joint, left wrist joint, other site, right 1st MTP joint, right 2nd MCP joint, right 2nd MTP joint, right 3rd MCP joint, right ankle joint, right hip joint, right knee joint, right wrist joint, unknown","","True","","","","string","","","","" +"vitiligoPattern","A high-level classification of vitiligo based on the distribution and patterning of lesions across the body.","mixed, non-segmental, segmental, unclassified","","False","","","","string","","","","" +"vitiligoPhenotype","Classification of vitiligo lesions which correlate with autoimmune activity and result in specific skin and depigmentation manifestations at the lesion site. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list.","active, confetti, inflammatory, none, trichrome","","True","","","list like error","string_list","","","","" +"psoriasisType","General type classification of psoriasis disease manifestation.","erythrodermic, guttate, inverse, plaque, pustular","","False","","","","string","","","","" +"ageDiagnosis","Age at which subject was diagnosed with `diagnosis`. If providing this value be sure the unit matches that used for `age`.","","","False","","","num error","number","","","","" +"age","Age at which subject was enrolled in study or age at corresponding visit and data collection event. If value unknown, enter '-1'.","","","True","","","num error","number","","","","" +"ageUnits","The unit of measure used for `ageEnrollment` and `ageDiagnosis`","months, years","","True","","","","string","","","","" +"sex","A textual description of a person's sex at birth.","female, intersex, male, unknown","","True","","","","string","","","","" +"height","Standing height of subject.","","","True","","","num error","number","","","","" +"race","A textual description of a person's race.","American Indian or Alaska Native, Asian, Black or African American, Hispanic, Mixed Race, Native Hawaiian or Other Pacific Islander, White, other, unknown","","False","","","","string","","","","" +"ethnicity","The ethnicity of a person.","Hispanic or Latino, Not Hispanic or Latino, unknown","","False","","","","string","","","","" +"heightUnits","Unit of measure of value provided for `height`.","centimeters, feet, inches, meters","","True","","","","string","","","","" +"weight","Weight of subject. If value unknown, enter '-1'.","","","True","","","num error","number","","","","" +"weightUnits","Abbreviated unit of measure of value provided for `weight`.","g, kg, lb, oz","","True","","","","string","","","","" +"comorbidities","Any diseases or medical condition that is simultaneously present in addition to `diagnosis`.","Hashimoto's Thyroiditis, autoimmune thyroid disease, cardiovascular disease, diabetes, inflammatory bowel disease, multiple sclerosis, other, psoriasis, psoriatic arthritis, pulmonary disease, rheumatoid arthritis, systemic lupus erythematosus","","False","","","list like error","string_list","","","","" +"diabetesType","Type of diabetes mellitus.","gestational, type 1, type 2, unknown","","False","","","","string","","","","" +"VIDA","Vitiligo Disease Activity Score, a six-point scale that evaluates the activity of vitiligo","","","False","","","","string","","","","" +"VASI","Total body Vitiligo Area Severity Index, a measure of the percentage of vitiligo involvement across the body calculated in terms of hand units.","","","False","","","","string","","","","" +"VETI","Vitiligo Extent Tensity Index measures the extent of vitiligo by a numerical score and combines analysis of extensity and severity of vitiligo to produce a constant and reproducible number.","","","False","","https://doi.org/10.5826/dpc.0404a18","num error","number","","","","" +"PASI","Psoriasis Area and Severity Index, is a measurement of the discoloration, thickness, scaling, and coverage of psoriasis plaques.","","","False","","","","string","","","","" +"CDASI","Cutaneous Dermatomyositis Disease Area and Severity Index, is a measurement used to characterize cutaneous dermatomyositis severity.","","","False","","","","string","","","","" +"inputCellCount","An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.","","","True","","","int error","integer","","","","" +"totalReads","Total number of reads sequenced from the library.","","","True","","","int error","integer","","","","" +"FACSPopulation","A description of the marker gating strategy used to derive the population cells with FACS.","","","True","","","","string","","","","" +"cellType","The cell type name from Cell Ontology for the corresponding CL identifier.","","","False","","","","string","","","","" +"userDefinedCellType","User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.","","","False","","","","string","","","","" +"krennInflammatory","A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennLining","A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennStroma","A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennSynovitisScore","The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"datasetStatus","A categorical label indicating the status of an ARK Portal dataset. This is applied to improve downstream management of datasets as well as various ETL workflows.","deprecated, released, test, under peer review, unreleased","","True","","","","string","","","","" +"sampleProcessingBatch","A label indicating batching of sample processing or preparation that occurs prior to data collection.","","","False","","","","string","","","","" +"dataCollectionBatch","A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.","","","False","","","","string","","","","" +"metadataStandards","Metadata standards used to generate the metadata","ARK data model, user-defined","","False","","","","string","","","","" +"skinSunExposure","For skin-based biospecimen, this attribute indicates whether the sample was collected from an anatomical site that does or does not receive routine sun exposure.","not sun exposed, sun exposed","","False","","","","string","","","","" +"sequencingSpikeIn","Pre-made sequencing libraries may be added to your sequencing run to improve sequencing results. If such a 'spike-in' library was used, this attribute specifies the name of the library and the percentage of the spike-in library used in the sequencing run.","","","False","","","","string","","","","" +"librarySpikeIn","Pre-made collections of nucleic acid fragments can be added to samples during the library construction process to improve downstream quantification and statistical analyses. If such a 'spike-in' was used, this attribute specifies the name and manufacturer of the spike-in.","","","False","","","","string","","","","" +"readLength","The number of base pairs (bp) sequenced for reads in a fastq file.","","","False","","","int error","integer","","","","" +"plateID","An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.","","","False","","","","string","","","","" +"processedDataType","A label used for file annotations to provide a brief description of the processed data file.","barcode counts, differential expression results, epigenomic peaks, gene counts","","False","","","list like error","string_list","","","","" +"eventCount","The total number of events detected and captured in the corresponding FCS file. In cytometry, an event corresponds to particles detected and measured by the instrument. This number corresponds to the number of rows in the FCS file and is expected to be a whole integer number.","","","False","","","int error","integer","","","","" +"notes","Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.","","","False","","","","string","","","","" +"species","The genus species of sample or subject origin.","Homo sapiens","","True","","","","string","","","","" +"treatment","A short descriptive label indicating what experimental treatments have been applied to a biospecimen before data capture. Please include the word 'control' to indicate specific treatments that are intended to serve as a control group.","","","True","","","","string","","","","" +"treatmentTimepoint","Where applicable, specify the timepoint relative to treatment to distinguish different sets of samples that have undergone the same treatment but for different lengths of time. REQUIRED: please specify the unit of time, e.g., secs, mins, hrs, days, etc.","","","False","","","","string","","","","" +"slideID","A distinct label or name, unique within an experiment, assigned to an imaging slides.","","","True","","","","string","","","","" +"altSampleID","An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.","","","False","","","","string","","","","" +"sampleCollectionBatch","A label indicating batching of sample collection or experiment execution that occurs prior to data collection.","","","False","","","","string","","","","" +"associatedAccession","This is a File and Dataset annotation attribute indicating additional accessions (i.e., unique identifiers) associated with the data when the data has also been submitted to or can be found in other repositories such as GEO, SRA, dbGaP, etc.","","","False","","","list like error","string_list","","","","" +"percentCellViability","A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.","","","True","","","inRange 50 100 error","integer","","50","100","" +"sequencingSaturation","A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.","","","False","","https://kb.10xgenomics.com/hc/en-us/articles/115005062366-What-is-sequencing-saturation","inRange 0 1 error","number","","0","1","" +"publicationSynID","The synID of the corresponding Synapse entity that stores metadata about the publication. This is used to differentiate publication-specific files, often consisting of level 4 processed data and expanded subject metadata, in a publication dataset that also includes raw or minimally processed files from experimental datasets. This provides an easy way to distinguish and select for the publication-specific data from which the research findings were derived. When this attribute is used to annotate a Dataset it serves as a way to directly link the Dataset entity with the publication metadata stored in Synapse.","","","False","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"associatedDataset","The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.","","","False","","","list like::regex search ^syn[0-9]{8} error","string_list","","","","^syn[0-9]{8}" +"custom10xProbeSetSynID","If custom modified probe sets were used for collecting 10x Chromium Flex scRNA-seq data, then the probe reference files should be grouped as a zip or tar archive and uploaded as metadata. This attribute links the experimental data back to the probe set archive file via a synapse ID.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"targetPanelSynID","In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"datasetDescription","A Dataset-specific attribute specifying the synID of the folder that contains a wiki write-up of the dataset description. This wiki content will be surfaced on the ARK Portal frontend site.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"DOID","Disease ontology identifier associated with `diagnosis`. Attribute values are applied by ARK Portal data managers.","","","True","","","regex search ^DOID error","string","","","","^DOID" +"BRENDA","BRENDA Tissue and Enzyme Source Ontology ID (BTO) corresponding to the `biospecimenType`. Attribute values are applied by ARK Portal data managers.","","","True","","https://bioportal.bioontology.org/ontologies/BTO","regex search ^BTO error","string","","","","^BTO" +"year","Year (YYYY) in which the paper was published.","","","True","","","regex search [1-2][0-9]{3} error","string","","","","[1-2][0-9]{3}" +"PMID","PubMed(R) Identifier","","","True","","","regex search ^PMID error","string","","","","^PMID" +"ImmPortAccession","Accession to corresponding information in ImmPort.","","","False","","","regex search ^SDY error","string","","","","^SDY" +"cellOntologyID","Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.","","","False","","","regex search ^CL: error","string","","","","^CL:" +"PMCID","Pubmed Central Identifier, formatted as a compact URI string that will automatically resolve into a URL based on the Synapse bioregistry; e.g., pmc:PMCxxxxxxxx","","","False","","","regex search ^pmc:PMC[0-9]{8} error","string","","","","^pmc:PMC[0-9]{8}" diff --git a/model_contexts/main/ark.main_context.csv b/model_contexts/main/ark.main_context.csv index f29776d1..6573b34d 100644 --- a/model_contexts/main/ark.main_context.csv +++ b/model_contexts/main/ark.main_context.csv @@ -1,14 +1,14 @@ -"Attribute","Description","Valid Values","DependsOn","Properties","Required","Parent","DependsOn Component","Source","Validation Rules","columnType" -"Dataset Annotation Template","A template outlining dataset metadata to use as annotations for a synapse dataset entity.","","Component, program, project, datasetType, biospecimenType, biospecimenSubtype, diagnosis, acknowledgmentStatement, datasetDescription, ARKRelease, dataType, dataSubtype, assay, publicationSynID, associatedDataset, associatedCodeURL, associatedAccession, ImmPortAccession, datasetStatus, species","","","","","","","" -"Publication Metadata Template","A template outlining metadata to use as annotations for Publication ‘file’ entities.","","Component, program, project, associatedDataset, PMID, PMCID, DOI, journal, year, title, publicationType, publicationDate","","","","","","","" -"scRNASeq Assay Metadata Template","A template outlining metadata to be collected for each library in a scRNA-seq dataset.","","Component, assay, specimenModality, libraryPrepMethod, nucleicAcidSource, totalReads, percentCellViability, platform, sequencingSaturation, sampleProcessingBatch, dataCollectionBatch, inputCellCount, alignmentReference, softwareAndVersion","","","","","","","" -"Bulk RNA-seq Assay Metadata Template","A template outlining metadata to be collected for each library in a bulk RNA-seq dataset.","","Component, assay, specimenModality, libraryPrepMethod, nucleicAcidSource, totalReads, platform, sampleProcessingBatch, dataCollectionBatch, alignmentReference, softwareAndVersion","","","","","","","" -"BDM Metadata File Annotations","A template outlining metadata to be collected for annotating metadata files to be compiled by ARK BDM.","","Component, fileFormat, dataType, resourceType, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","" -"snRNASeq Assay Metadata Template","A template outlining metadata to be collected for each library in a snRNA-seq dataset.","","Component, assay, specimenModality, libraryPrepMethod, nucleicAcidSource, totalReads, percentCellViability, platform, sequencingSaturation, sampleProcessingBatch, dataCollectionBatch, inputCellCount, alignmentReference, softwareAndVersion","","","","","","","" -"Bulk ATAC-seq Assay Metadata Template","A template outlining metadata to be collected for each library in a bulk ATAC-seq dataset.","","Component, assay, specimenModality, libraryPrepMethod, nucleicAcidSource, totalReads, platform, sampleProcessingBatch, dataCollectionBatch, alignmentReference, softwareAndVersion","","","","","","","" -"snATAC-seq Assay Metadata Template","A template outlining metadata to be collected for each library in a snATAC-seq dataset.","","Component, assay, specimenModality, libraryPrepMethod, nucleicAcidSource, totalReads, percentCellViability, platform, sequencingSaturation, sampleProcessingBatch, dataCollectionBatch, inputCellCount, alignmentReference, softwareAndVersion","","","","","","","" -"CyTOF Assay Metadata Template","A template outlining assay-related metadata for a cytometry time-of-flight (CyTOF) dataset. Each row corresponds to a biospecimen profiled in the experiment.","","Component, assay, biospecimenID, sampleProcessingBatch, dataCollectionBatch, platform, softwareAndVersion, targetPanel, targetPanelSynID, targetPanelSize","","","","","","","" -"metadata","Any file that contains curated data describing an experiment and experimental-derived data, including metadata about study subjects, biospecimens, protocols, assay reagents, marker panels, and ID mappings. Is a valid value of Resource Type that triggers conditional dependencies for additional attributes.","","metadataType, metadataStandards","","","","","","","" -"10x GEM-X Flex Gene Expression Human","A scRNA-seq method that analyzes gene expression using pre-designed probes. Is a valid value of `libraryPrepMethod` and when selected triggers conditional dependencies for additional metadata collection.","","10xProbeSetReference","","","","","","","" -"multispecimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","libraryID","","","","","","","" -"single specimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","biospecimenID","","","","","","","" +"Attribute","Description","Valid Values","DependsOn","Required","Parent","Source","Validation Rules","columnType","Format","Minimum","Maximum","Pattern" +"Dataset Annotation Template","A template outlining dataset metadata to use as annotations for a synapse dataset entity.","","Component, program, project, datasetType, biospecimenType, biospecimenSubtype, diagnosis, acknowledgmentStatement, datasetDescription, ARKRelease, dataType, dataSubtype, assay, publicationSynID, associatedDataset, associatedCodeURL, associatedAccession, ImmPortAccession, datasetStatus, species","","","","","","","","","" +"Publication Metadata Template","A template outlining metadata to use as annotations for Publication ‘file’ entities.","","Component, program, project, associatedDataset, PMID, PMCID, DOI, journal, year, title, publicationType, publicationDate","","","","","","","","","" +"scRNASeq Assay Metadata Template","A template outlining metadata to be collected for each library in a scRNA-seq dataset.","","Component, assay, specimenModality, libraryPrepMethod, nucleicAcidSource, totalReads, percentCellViability, platform, sequencingSaturation, sampleProcessingBatch, dataCollectionBatch, inputCellCount, alignmentReference, softwareAndVersion","","","","","","","","","" +"Bulk RNA-seq Assay Metadata Template","A template outlining metadata to be collected for each library in a bulk RNA-seq dataset.","","Component, assay, specimenModality, libraryPrepMethod, nucleicAcidSource, totalReads, platform, sampleProcessingBatch, dataCollectionBatch, alignmentReference, softwareAndVersion","","","","","","","","","" +"BDM Metadata File Annotations","A template outlining metadata to be collected for annotating metadata files to be compiled by ARK BDM.","","Component, fileFormat, dataType, resourceType, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","","","" +"snRNASeq Assay Metadata Template","A template outlining metadata to be collected for each library in a snRNA-seq dataset.","","Component, assay, specimenModality, libraryPrepMethod, nucleicAcidSource, totalReads, percentCellViability, platform, sequencingSaturation, sampleProcessingBatch, dataCollectionBatch, inputCellCount, alignmentReference, softwareAndVersion","","","","","","","","","" +"Bulk ATAC-seq Assay Metadata Template","A template outlining metadata to be collected for each library in a bulk ATAC-seq dataset.","","Component, assay, specimenModality, libraryPrepMethod, nucleicAcidSource, totalReads, platform, sampleProcessingBatch, dataCollectionBatch, alignmentReference, softwareAndVersion","","","","","","","","","" +"snATAC-seq Assay Metadata Template","A template outlining metadata to be collected for each library in a snATAC-seq dataset.","","Component, assay, specimenModality, libraryPrepMethod, nucleicAcidSource, totalReads, percentCellViability, platform, sequencingSaturation, sampleProcessingBatch, dataCollectionBatch, inputCellCount, alignmentReference, softwareAndVersion","","","","","","","","","" +"CyTOF Assay Metadata Template","A template outlining assay-related metadata for a cytometry time-of-flight (CyTOF) dataset. Each row corresponds to a biospecimen profiled in the experiment.","","Component, assay, biospecimenID, sampleProcessingBatch, dataCollectionBatch, platform, softwareAndVersion, targetPanel, targetPanelSynID, targetPanelSize","","","","","","","","","" +"metadata","Any file that contains curated data describing an experiment and experimental-derived data, including metadata about study subjects, biospecimens, protocols, assay reagents, marker panels, and ID mappings. Is a valid value of Resource Type that triggers conditional dependencies for additional attributes.","","metadataType, metadataStandards","","","","","","","","","" +"10x GEM-X Flex Gene Expression Human","A scRNA-seq method that analyzes gene expression using pre-designed probes. Is a valid value of `libraryPrepMethod` and when selected triggers conditional dependencies for additional metadata collection.","","10xProbeSetReference","","","","","","","","","" +"multispecimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","libraryID","","","","","","","","","" +"single specimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","biospecimenID","","","","","","","","","" diff --git a/model_contexts/main/ark.main_model.csv b/model_contexts/main/ark.main_model.csv index 59428da3..7f67f9dc 100644 --- a/model_contexts/main/ark.main_model.csv +++ b/model_contexts/main/ark.main_model.csv @@ -1,123 +1,123 @@ -"Attribute","Description","Valid Values","DependsOn","Properties","Required","Parent","DependsOn Component","Source","Validation Rules","columnType" -"Dataset Annotation Template","A template outlining dataset metadata to use as annotations for a synapse dataset entity.","","Component, program, project, datasetType, biospecimenType, biospecimenSubtype, diagnosis, acknowledgmentStatement, datasetDescription, ARKRelease, dataType, dataSubtype, assay, publicationSynID, associatedDataset, associatedCodeURL, associatedAccession, ImmPortAccession, datasetStatus, species","","","","","","","" -"Publication Metadata Template","A template outlining metadata to use as annotations for Publication ‘file’ entities.","","Component, program, project, associatedDataset, PMID, PMCID, DOI, journal, year, title, publicationType, publicationDate","","","","","","","" -"scRNASeq Assay Metadata Template","A template outlining metadata to be collected for each library in a scRNA-seq dataset.","","Component, assay, specimenModality, libraryPrepMethod, nucleicAcidSource, totalReads, percentCellViability, platform, sequencingSaturation, sampleProcessingBatch, dataCollectionBatch, inputCellCount, alignmentReference, softwareAndVersion","","","","","","","" -"Bulk RNA-seq Assay Metadata Template","A template outlining metadata to be collected for each library in a bulk RNA-seq dataset.","","Component, assay, specimenModality, libraryPrepMethod, nucleicAcidSource, totalReads, platform, sampleProcessingBatch, dataCollectionBatch, alignmentReference, softwareAndVersion","","","","","","","" -"BDM Metadata File Annotations","A template outlining metadata to be collected for annotating metadata files to be compiled by ARK BDM.","","Component, fileFormat, dataType, resourceType, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","" -"snRNASeq Assay Metadata Template","A template outlining metadata to be collected for each library in a snRNA-seq dataset.","","Component, assay, specimenModality, libraryPrepMethod, nucleicAcidSource, totalReads, percentCellViability, platform, sequencingSaturation, sampleProcessingBatch, dataCollectionBatch, inputCellCount, alignmentReference, softwareAndVersion","","","","","","","" -"Bulk ATAC-seq Assay Metadata Template","A template outlining metadata to be collected for each library in a bulk ATAC-seq dataset.","","Component, assay, specimenModality, libraryPrepMethod, nucleicAcidSource, totalReads, platform, sampleProcessingBatch, dataCollectionBatch, alignmentReference, softwareAndVersion","","","","","","","" -"snATAC-seq Assay Metadata Template","A template outlining metadata to be collected for each library in a snATAC-seq dataset.","","Component, assay, specimenModality, libraryPrepMethod, nucleicAcidSource, totalReads, percentCellViability, platform, sequencingSaturation, sampleProcessingBatch, dataCollectionBatch, inputCellCount, alignmentReference, softwareAndVersion","","","","","","","" -"CyTOF Assay Metadata Template","A template outlining assay-related metadata for a cytometry time-of-flight (CyTOF) dataset. Each row corresponds to a biospecimen profiled in the experiment.","","Component, assay, biospecimenID, sampleProcessingBatch, dataCollectionBatch, platform, softwareAndVersion, targetPanel, targetPanelSynID, targetPanelSize","","","","","","","" -"metadata","Any file that contains curated data describing an experiment and experimental-derived data, including metadata about study subjects, biospecimens, protocols, assay reagents, marker panels, and ID mappings. Is a valid value of Resource Type that triggers conditional dependencies for additional attributes.","","metadataType, metadataStandards","","","","","","","" -"10x GEM-X Flex Gene Expression Human","A scRNA-seq method that analyzes gene expression using pre-designed probes. Is a valid value of `libraryPrepMethod` and when selected triggers conditional dependencies for additional metadata collection.","","10xProbeSetReference","","","","","","","" -"multispecimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","libraryID","","","","","","","" -"single specimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","biospecimenID","","","","","","","" -"Component","A high-level attribute for grouping attributes into templates.","","","","True","","","","","string" -"fileFormat","Standard file format name or file extension","bai, bam, bed, bim, csv, czi, docx, dose, erate, fam, fastq, fcs, geojson, h5, h5ad, info, mcd, mtx, parquet, pdf, py, rds, rec, svs, tbi, tgz, tsv, txt, vcf, xls, xlsx, zip","","","True","","","","","string" -"publicationSynID","The synID of the corresponding Synapse entity that stores metadata about the publication. This is used to differentiate publication-specific files, often consisting of level 4 processed data and expanded subject metadata, in a publication dataset that also includes raw or minimally processed files from experimental datasets. This provides an easy way to distinguish and select for the publication-specific data from which the research findings were derived. When this attribute is used to annotate a Dataset it serves as a way to directly link the Dataset entity with the publication metadata stored in Synapse.","","","","False","","","","regex search ^syn[0-9]{8} error","string" -"associatedDataset","The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.","","","","False","","","","list like::regex search ^syn[0-9]{8} error","string" -"program","Name of the funding program that supported the generation of data and associated files","AMP AIM, AMP RA/SLE, Community Contribution","","","True","","","","","string" -"project","A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.","AIM for RA, ELLIPSS, LOCKIT, RA, SLE, STAMP, UMass V-CoRT","","","True","","","","list like error","string" -"programPhase","A label noting which AMP RA/SLE program phase generated the data.","I, II","","","True","","","","list like error","string" -"resourceType","High-level classification of the file content","code, experimental data, figure, metadata","","","True","","","","","string" -"dataType","High-level classification of the type of data contained in the file, loosely related to the experimental method or biological entity that is being profiled. Select all that apply using a comma-delimited list, though in most cases only a single label is expected. For multimodal datasets with concomitant profiling of biospecimen include 'multimodal'.","cytometry, epigenomics, genomics, histology, immune repertoire profiling, immunostaining, lipidomics, metabolomics, microbiome, multimodal, proteomics, transcriptomics","","","True","","","","list like error","string" -"dataSubtype","General classification to differentiate between omics profiling modalities. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list, however this is largely only expected in the context of Datasets and files that contain integrated experimental data spanning multiple types.","bulk, none, pseudobulk, single-cell, single-nucleus, spatial","","","True","","","","list like error","string" -"dataLevel","Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.","1, 2, 3, 4, 5","","","True","","","","","string" -"assay","The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.","ASAPSeq, CE-MS, CITESeq, CosMX, CyTOF, GenePS SeqFISH, H&E, LC-MS/MS, NULISA, Olink Explore HT, Olink Flex, Olink Focus, Olink Reveal, Olink Target 48, Olink Target 96, RNASeq, SNP array, SomaScan, VDJSeq, Visium, WES, WGS, Xenium, feature barcode sequencing, flow cytometry, imaging mass cytometry, imaging mass spectrometry, kiloplex, multiplexed ELISA, scRNASeq, scVDJSeq, serial IHC, snATACSeq, snRNASeq","","","True","","","","list like error","string" -"10xProbeSetReference","Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.","Flex Human Transcriptome Probe Set v1.0.1, Flex Human Transcriptome Probe Set v1.1.0, Visium Human Transcriptome v1, Visium Human Transcriptome v2, custom probe set","","","True","","","","","string" -"custom10xProbeSetSynID","If custom modified probe sets were used for collecting 10x Chromium Flex scRNA-seq data, then the probe reference files should be grouped as a zip or tar archive and uploaded as metadata. This attribute links the experimental data back to the probe set archive file via a synapse ID.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"platform","The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.","BD FACSAria Fusion cell sorter, BD FACSAria III, BD FACSCanto, BD FACSCanto II, BD FACSDiscover A8, BD FACSDiscover S8, BD FACSLyric Clinical, BD FACSMelody, BD FACSymphony S6, BD LSRFortessa, Chromium Controller, Chromium GEM-X Single Cell 3' Chip v4, Chromium Next GEM Chip G, Chromium Next GEM Chip H, Chromium Next GEM Chip K, Chromium Next GEM Chip M, Chromium Next GEM Chip Q, Chromium X, Chromium Xo, Chromium iX, CyTOF XT, Cytek Aurora, Cytek Aurora Evo, Fluidigm BioMark, GEM-X Flex Gene Expression Chip, GEM-X OCM 5' Chip, Helios Mass Cytometer, Hyperion, Illumina HiSeq 2500, Illumina HiSeq X Ten, Illumina NextSeq 500, Illumina NovaSeq 6000, Illumina NovaSeq X, Not Applicable, Olink Signature Q100, Sony MA900, Thermo Fisher Attune CytPix, Thermo Fisher Attune NxT, Thermo Fisher Attune Xenith, Visium CytAssist, Xenium, none, unknown","","","True","","","","list like error","string" -"specimenModality","Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens","multispecimen, single specimen, unknown","","","True","","","","","string" -"individualID","Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.","","","","True","","","","#ClinicalMetadataTemplate unique error^^#BiospecimenMetadataTemplate str^^list like error","string" -"biospecimenID","A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.","","","","True","","","","#BiospecimenMetadataTemplate unique error^^list like error","string" -"parentBiospecimenID","The biospecimenID associated with the originating biospecimen for derived or child biospecimens.","","","","False","","","","","string" -"biospecimenType","A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.","PBMCs, cell line, fibroblast-like synoviocyte, kidney biopsy, none, plasma, primary cell culture, saliva, salivary gland, serum, skin biopsy, skin swab, stool, suction blister cells, suction blister fluid, synovial fluid, synovial tissue, total leukocytes, urine, uvea, whole blood","","","True","","","","#BiospecimenMetadataTemplate str^^#InVitroBiospecimenMetadataTemplate^^list like error","string" -"primaryCellSource","A label indicating the biological source material from which a primary cell culture was derived.","PBMCs, kidney, pannus-derived dermis, pannus-derived epidermis, salivary gland, synovial tissue, total leukocytes, urine, uvea, whole blood","","","True","","","","","string" -"metadataType","A label further classifying the content of metadata resource.","assay, biospecimen, cell coordinates, data dictionary, file manifest, medication, other, phenotype, protocol, single-cell metadata, target panel, template, tissue microarray map, tissue multiarray map, user manual","","","True","","","","","string" -"codingLanguage","The coding, aka programming, language(s) contained in the file marked with `resourceType = code`. Select all that apply.","C, C#, C++, Fortan, Java, Julia, Matlab, Python, R, Ruby, SAS","","","True","","","","","string" -"visitID","Ordinal ID distinguishing different patient visits.","","","","True","","","","","string" -"softwareAndVersion","Relevant software and version used to generate the data file.","BD FACSDiva 8.0.1, Cell Ranger 9.0.1, Cell Ranger ATAC v1.1.0, Cell Ranger v3.0.0, Cell Ranger v3.0.1, Cell Ranger v3.0.2, Cell Ranger v3.1.0, Cell Ranger v4.0.0, Cell Ranger v5.0.0, Cell Ranger v5.0.1, Cell Ranger v6.0.0, Cell Ranger v6.0.1, Cell Ranger v6.0.2, Cell Ranger v6.1.0, Cell Ranger v6.1.1, Cell Ranger v6.1.2, Cell Ranger v7.0.0, Cell Ranger v7.0.1, Cell Ranger v7.1.0, Cell Ranger v7.2.0, Cell Ranger v8.0.0, Cell Ranger v8.0.1, Cell Ranger v9.0.0, Space Ranger 3.0.0, Space Ranger 3.0.1, Space Ranger 3.1.0, Space Ranger 3.1.1, Space Ranger 3.1.2, Space Ranger 3.1.3, demuxlet","","","False","","","","","string" -"targetPanel","A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.","","","","True","","","","","string" -"targetPanelSynID","In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"targetPanelSize","The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).","","","","False","","","","int error","integer" -"nucleicAcidSource","The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.","BCR mRNA, CRISPR protospacer feature barcode, TCR mRNA, Tn5-accessible gDNA, antigen capture barcode, gDNA, globin-depleted RNA, intracellular protein feature barcode, multiplexing oligo, poly(A) RNA, rRNA-depleted RNA, surface protein feature barcode","","","True","","","","list like error","string" -"biospecimenSubtype","Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.","FFPE tissue, PFA-fixed tissue, cell or tissue lysate, cell suspension, flow-sorted cells, fresh tissue, frozen tissue, nuclei suspension, supernatant","","","False","","","","","string" -"cellRangerOutput","10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz","Not Applicable, filtered MEX, filtered_feature_bc_matrix, filtered_peak_bc_matrix, raw MEX, raw_feature_bc_matrix, raw_peak_bc_matrix","","","True","","","","","string" -"alignmentReference","The genomic/transcriptomic reference used for performing read alignment against.","10x Cell Ranger Human GRCh38 2020-A, 10x Cell Ranger Human GRCh38 2024-A, GRCh38, modified GRCh38, unknown, vdj_GRCh38_alts_ensembl-4.0.0","","","True","","","","","string" -"libraryPrepMethod","Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.","10x Chromium Fixed RNA Human Transcriptome, 10x Chromium GEM-X Single Cell 3' v4, 10x Chromium GEM-X Single Cell 5' v3, 10x Chromium Next GEM Single Cell 3', 10x Chromium Next GEM Single Cell 3' 3.1, 10x Chromium Next GEM Single Cell 5' v1.1, 10x Chromium Next GEM Single Cell 5' v2, 10x Chromium Next GEM Single Cell ATAC v2, 10x Chromium Single Cell Human BCR, 10x Chromium Single Cell Human TCR, 10x GEM-X Flex Gene Expression Human, 10x GEM-X Universal 5' Gene Expression v3, CEL-Seq2, Chromium Next GEM Single Cell ATAC v1.1, Fluidigm C1 HT, NEBNext Human Immune Sequencing Kit, NEBNext Ultra II Directional RNA Library, Nextera XT, Nextera XT DNA, QIAseq miRNA Library, SMART-Seq Human BCR with UMI, SMART-Seq Human TCR with UMI, SMART-Seq v4 Ultra Low Input RNA, SMARTer Stranded Total RNA v2, Takara Human BCR profiling for Illumina, Takara Human TCR profiling for Illumina, Takara Human TCRv2 profiling for Illumina, Takara Human scTCR profiling for Illumina, TruSeq Stranded mRNA, custom DASH-treatment, in-house library prep","","","True","","","","","string" -"datasetType","High-level classification of dataset entity distinguishing between datasets compiled for a specific publication or as a general data resource.","experimental, publication","","","True","","","","","string" -"acknowledgmentStatement","A Dataset-specific attribute specifying the path to the wiki subpage within the ARK Portal - backend project that contains the acknowledgement statement that must be included in publications using data from the given dataset as a stipulation of the conditions of use.","syn26710600/wiki/619685","","","True","","","","","string" -"datasetDescription","A Dataset-specific attribute specifying the synID of the folder that contains a wiki write-up of the dataset description. This wiki content will be surfaced on the ARK Portal frontend site.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"ARKRelease","A Dataset-specific attribute specifying the ARK Portal release in which this dataset was first made available to the public.","1.0, 2.0, 2024.06.R1, 2024.07.R1, 2024.08.R1, 2024.09.R1, 2024.10.R1, 2024.12.R1, 2025.01.R1, 2025.02.R1, 2025.03.R1, 2025.04.R1, 2025.05.R1, 2025.06.R1, 2025.07.R1, 2025.08.R1, 2025.09.R1, 2025.10.R1, 2025.11.R1, 2025.12.R1","","","True","","","","","string" -"RObjectClass","Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.","ROCR prediction.object, Seurat object, SummarizedExperiment, Symphony reference, data.frame, list, matrix, sparse matrix, vector","","","False","","","","","string" -"diagnosis","A high-level classifier indicating the disease status of an individual.","At-Risk RA, Not Applicable, OA, RA, SLE, Sjogren's disease, control, cutaneous lupus erythematosus, dermatomyositis, discoid lupus erythematosus, lupus nephritis, psoriasis, psoriatic arthritis, scleroderma, unknown, vitiligo","","","True","","","","list like error","string" -"DOID","Disease ontology identifier associated with `diagnosis`. Attribute values are applied by ARK Portal data managers.","","","","True","","","","regex search ^DOID error","string" -"libraryID","A library label or name, unique within an experiment, used to distinguish sequencing libraries.","","","","True","","","","","string" -"associatedCodeURL","A URL to the repository where associated code is available.","","","","False","","","","","string" -"FMAID","Functional Model of Anatomy ontology ID corresponding to the anatomical site and origin of the biospecimen. This attribute is assigned by ARK Portal data managers.","","","","True","","","https://bioportal.bioontology.org/ontologies/FMA/?p=summary","int error","integer" -"BRENDA","BRENDA Tissue and Enzyme Source Ontology ID (BTO) corresponding to the `biospecimenType`. Attribute values are applied by ARK Portal data managers.","","","","True","","","https://bioportal.bioontology.org/ontologies/BTO","regex search ^BTO error","string" -"skinSiteStatus","Disease manifestation status of skin biospecimen.","healthy control, lesional, lesional proximal, non-lesional","","","True","","","","","string" -"salivaCollectionProcedure","Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.","stimulated, unstimulated","","","True","","","","","string" -"synovialCollectionProcedure","Classification of procedure for synovial tissue collection.","arthroplasty, biopsy, synovectomy, unknown","","","True","","","","","string" -"publicationType","General classification of publication.","correction, peer-reviewed, pre-print","","","True","","","","","string" -"title","Title of the publication.","","","","True","","","","","string" -"journal","Journal in which the publication was released","","","","True","","","","","string" -"year","Year (YYYY) in which the paper was published.","","","","True","","","","regex search [1-2][0-9]{3} error","string" -"publicationDate","The publication date extracted from PubMed database","","","","True","","","","","string" -"PMID","PubMed(R) Identifier","","","","True","","","","regex search ^PMID error","string" -"DOI","Digital object identifier","","","","True","","","","","string" -"anatomicalSite","The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.","left 2nd MCP joint, left ankle joint, left hip joint, left knee joint, left wrist joint, other site, right 1st MTP joint, right 2nd MCP joint, right 2nd MTP joint, right 3rd MCP joint, right ankle joint, right hip joint, right knee joint, right wrist joint, unknown","","","True","","","","","string" -"vitiligoPattern","A high-level classification of vitiligo based on the distribution and patterning of lesions across the body.","mixed, non-segmental, segmental, unclassified","","","False","","","","","string" -"vitiligoPhenotype","Classification of vitiligo lesions which correlate with autoimmune activity and result in specific skin and depigmentation manifestations at the lesion site. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list.","active, confetti, inflammatory, none, trichrome","","","True","","","","list like error","string" -"psoriasisType","General type classification of psoriasis disease manifestation.","erythrodermic, guttate, inverse, plaque, pustular","","","False","","","","","string" -"ageDiagnosis","Age at which subject was diagnosed with `diagnosis`. If providing this value be sure the unit matches that used for `age`.","","","","False","","","","num error","number" -"age","Age at which subject was enrolled in study or age at corresponding visit and data collection event. If value unknown, enter '-1'.","","","","True","","","","num error","number" -"ageUnits","The unit of measure used for `ageEnrollment` and `ageDiagnosis`","months, years","","","True","","","","","string" -"sex","A textual description of a person's sex at birth.","female, intersex, male, unknown","","","True","","","","","string" -"height","Standing height of subject.","","","","True","","","","num error","number" -"race","A textual description of a person's race.","American Indian or Alaska Native, Asian, Black or African American, Hispanic, Mixed Race, Native Hawaiian or Other Pacific Islander, White, other, unknown","","","False","","","","","string" -"ethnicity","The ethnicity of a person.","Hispanic or Latino, Not Hispanic or Latino, unknown","","","False","","","","","string" -"heightUnits","Unit of measure of value provided for `height`.","centimeters, feet, inches, meters","","","True","","","","","string" -"weight","Weight of subject. If value unknown, enter '-1'.","","","","True","","","","num error","number" -"weightUnits","Abbreviated unit of measure of value provided for `weight`.","g, kg, lb, oz","","","True","","","","","string" -"comorbidities","Any diseases or medical condition that is simultaneously present in addition to `diagnosis`.","Hashimoto's Thyroiditis, autoimmune thyroid disease, cardiovascular disease, diabetes, inflammatory bowel disease, multiple sclerosis, other, psoriasis, psoriatic arthritis, pulmonary disease, rheumatoid arthritis, systemic lupus erythematosus","","","False","","","","list like error","string" -"diabetesType","Type of diabetes mellitus.","gestational, type 1, type 2, unknown","","","False","","","","","string" -"VIDA","Vitiligo Disease Activity Score, a six-point scale that evaluates the activity of vitiligo","","","","False","","","","","string" -"VASI","Total body Vitiligo Area Severity Index, a measure of the percentage of vitiligo involvement across the body calculated in terms of hand units.","","","","False","","","","","string" -"VETI","Vitiligo Extent Tensity Index measures the extent of vitiligo by a numerical score and combines analysis of extensity and severity of vitiligo to produce a constant and reproducible number.","","","","False","","","https://doi.org/10.5826/dpc.0404a18","num error","number" -"PASI","Psoriasis Area and Severity Index, is a measurement of the discoloration, thickness, scaling, and coverage of psoriasis plaques.","","","","False","","","","","string" -"CDASI","Cutaneous Dermatomyositis Disease Area and Severity Index, is a measurement used to characterize cutaneous dermatomyositis severity.","","","","False","","","","","string" -"percentCellViability","A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.","","","","True","","","","inRange 50 100 error","integer" -"inputCellCount","An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.","","","","True","","","","int error","integer" -"totalReads","Total number of reads sequenced from the library.","","","","True","","","","int error","integer" -"sequencingSaturation","A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.","","","","False","","","https://kb.10xgenomics.com/hc/en-us/articles/115005062366-What-is-sequencing-saturation","inRange 0 1 error","number" -"ImmPortAccession","Accession to corresponding information in ImmPort.","","","","False","","","","regex search ^SDY error","string" -"FACSPopulation","A description of the marker gating strategy used to derive the population cells with FACS.","","","","True","","","","","string" -"cellOntologyID","Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.","","","","False","","","","regex search ^CL: error","string" -"cellType","The cell type name from Cell Ontology for the corresponding CL identifier.","","","","False","","","","","string" -"userDefinedCellType","User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.","","","","False","","","","","string" -"krennInflammatory","A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennLining","A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennStroma","A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennSynovitisScore","The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"datasetStatus","A categorical label indicating the status of an ARK Portal dataset. This is applied to improve downstream management of datasets as well as various ETL workflows.","deprecated, released, test, under peer review, unreleased","","","True","","","","","string" -"sampleProcessingBatch","A label indicating batching of sample processing or preparation that occurs prior to data collection.","","","","False","","","","","string" -"dataCollectionBatch","A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.","","","","False","","","","","string" -"metadataStandards","Metadata standards used to generate the metadata","ARK data model, user-defined","","","False","","","","","string" -"skinSunExposure","For skin-based biospecimen, this attribute indicates whether the sample was collected from an anatomical site that does or does not receive routine sun exposure.","not sun exposed, sun exposed","","","False","","","","","string" -"sequencingSpikeIn","Pre-made sequencing libraries may be added to your sequencing run to improve sequencing results. If such a 'spike-in' library was used, this attribute specifies the name of the library and the percentage of the spike-in library used in the sequencing run.","","","","False","","","","","string" -"librarySpikeIn","Pre-made collections of nucleic acid fragments can be added to samples during the library construction process to improve downstream quantification and statistical analyses. If such a 'spike-in' was used, this attribute specifies the name and manufacturer of the spike-in.","","","","False","","","","","string" -"readLength","The number of base pairs (bp) sequenced for reads in a fastq file.","","","","False","","","","int error","integer" -"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","programPhase","","False","","","","","" -"plateID","An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.","","","","False","","","","","string" -"PMCID","Pubmed Central Identifier, formatted as a compact URI string that will automatically resolve into a URL based on the Synapse bioregistry; e.g., pmc:PMCxxxxxxxx","","","","False","","","","regex search ^pmc:PMC[0-9]{8} error","string" -"processedDataType","A label used for file annotations to provide a brief description of the processed data file.","barcode counts, differential expression results, epigenomic peaks, gene counts","","","False","","","","list like error","string" -"eventCount","The total number of events detected and captured in the corresponding FCS file. In cytometry, an event corresponds to particles detected and measured by the instrument. This number corresponds to the number of rows in the FCS file and is expected to be a whole integer number.","","","","False","","","","int error","integer" -"notes","Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.","","","","False","","","","","string" -"species","The genus species of sample or subject origin.","Homo sapiens","","","True","","","","","string" -"treatment","A short descriptive label indicating what experimental treatments have been applied to a biospecimen before data capture. Please include the word 'control' to indicate specific treatments that are intended to serve as a control group.","","","","True","","","","","string" -"treatmentTimepoint","Where applicable, specify the timepoint relative to treatment to distinguish different sets of samples that have undergone the same treatment but for different lengths of time. REQUIRED: please specify the unit of time, e.g., secs, mins, hrs, days, etc.","","","","False","","","","","string" -"slideID","A distinct label or name, unique within an experiment, assigned to an imaging slides.","","","","True","","","","","string" -"altSampleID","An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.","","","","False","","","","","string" -"sampleCollectionBatch","A label indicating batching of sample collection or experiment execution that occurs prior to data collection.","","","","False","","","","","string" -"associatedAccession","This is a File and Dataset annotation attribute indicating additional accessions (i.e., unique identifiers) associated with the data when the data has also been submitted to or can be found in other repositories such as GEO, SRA, dbGaP, etc.","","","","False","","","","list like error","string" +"Attribute","Description","Valid Values","DependsOn","Required","Parent","Source","Validation Rules","columnType","Format","Minimum","Maximum","Pattern" +"Dataset Annotation Template","A template outlining dataset metadata to use as annotations for a synapse dataset entity.","","Component, program, project, datasetType, biospecimenType, biospecimenSubtype, diagnosis, acknowledgmentStatement, datasetDescription, ARKRelease, dataType, dataSubtype, assay, publicationSynID, associatedDataset, associatedCodeURL, associatedAccession, ImmPortAccession, datasetStatus, species","","","","","","","","","" +"Publication Metadata Template","A template outlining metadata to use as annotations for Publication ‘file’ entities.","","Component, program, project, associatedDataset, PMID, PMCID, DOI, journal, year, title, publicationType, publicationDate","","","","","","","","","" +"scRNASeq Assay Metadata Template","A template outlining metadata to be collected for each library in a scRNA-seq dataset.","","Component, assay, specimenModality, libraryPrepMethod, nucleicAcidSource, totalReads, percentCellViability, platform, sequencingSaturation, sampleProcessingBatch, dataCollectionBatch, inputCellCount, alignmentReference, softwareAndVersion","","","","","","","","","" +"Bulk RNA-seq Assay Metadata Template","A template outlining metadata to be collected for each library in a bulk RNA-seq dataset.","","Component, assay, specimenModality, libraryPrepMethod, nucleicAcidSource, totalReads, platform, sampleProcessingBatch, dataCollectionBatch, alignmentReference, softwareAndVersion","","","","","","","","","" +"BDM Metadata File Annotations","A template outlining metadata to be collected for annotating metadata files to be compiled by ARK BDM.","","Component, fileFormat, dataType, resourceType, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","","","" +"snRNASeq Assay Metadata Template","A template outlining metadata to be collected for each library in a snRNA-seq dataset.","","Component, assay, specimenModality, libraryPrepMethod, nucleicAcidSource, totalReads, percentCellViability, platform, sequencingSaturation, sampleProcessingBatch, dataCollectionBatch, inputCellCount, alignmentReference, softwareAndVersion","","","","","","","","","" +"Bulk ATAC-seq Assay Metadata Template","A template outlining metadata to be collected for each library in a bulk ATAC-seq dataset.","","Component, assay, specimenModality, libraryPrepMethod, nucleicAcidSource, totalReads, platform, sampleProcessingBatch, dataCollectionBatch, alignmentReference, softwareAndVersion","","","","","","","","","" +"snATAC-seq Assay Metadata Template","A template outlining metadata to be collected for each library in a snATAC-seq dataset.","","Component, assay, specimenModality, libraryPrepMethod, nucleicAcidSource, totalReads, percentCellViability, platform, sequencingSaturation, sampleProcessingBatch, dataCollectionBatch, inputCellCount, alignmentReference, softwareAndVersion","","","","","","","","","" +"CyTOF Assay Metadata Template","A template outlining assay-related metadata for a cytometry time-of-flight (CyTOF) dataset. Each row corresponds to a biospecimen profiled in the experiment.","","Component, assay, biospecimenID, sampleProcessingBatch, dataCollectionBatch, platform, softwareAndVersion, targetPanel, targetPanelSynID, targetPanelSize","","","","","","","","","" +"metadata","Any file that contains curated data describing an experiment and experimental-derived data, including metadata about study subjects, biospecimens, protocols, assay reagents, marker panels, and ID mappings. Is a valid value of Resource Type that triggers conditional dependencies for additional attributes.","","metadataType, metadataStandards","","","","","","","","","" +"10x GEM-X Flex Gene Expression Human","A scRNA-seq method that analyzes gene expression using pre-designed probes. Is a valid value of `libraryPrepMethod` and when selected triggers conditional dependencies for additional metadata collection.","","10xProbeSetReference","","","","","","","","","" +"multispecimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","libraryID","","","","","","","","","" +"single specimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","biospecimenID","","","","","","","","","" +"Component","A high-level attribute for grouping attributes into templates.","","","True","","","","string","","","","" +"fileFormat","Standard file format name or file extension","bai, bam, bed, bim, csv, czi, docx, dose, erate, fam, fastq, fcs, geojson, h5, h5ad, info, mcd, mtx, parquet, pdf, py, rds, rec, svs, tbi, tgz, tsv, txt, vcf, xls, xlsx, zip","","True","","","","string","","","","" +"program","Name of the funding program that supported the generation of data and associated files","AMP AIM, AMP RA/SLE, Community Contribution","","True","","","","string","","","","" +"project","A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.","AIM for RA, ELLIPSS, LOCKIT, RA, SLE, STAMP, UMass V-CoRT","","True","","","list like error","string_list","","","","" +"programPhase","A label noting which AMP RA/SLE program phase generated the data.","I, II","","True","","","list like error","string_list","","","","" +"resourceType","High-level classification of the file content","code, experimental data, figure, metadata","","True","","","","string","","","","" +"dataType","High-level classification of the type of data contained in the file, loosely related to the experimental method or biological entity that is being profiled. Select all that apply using a comma-delimited list, though in most cases only a single label is expected. For multimodal datasets with concomitant profiling of biospecimen include 'multimodal'.","cytometry, epigenomics, genomics, histology, immune repertoire profiling, immunostaining, lipidomics, metabolomics, microbiome, multimodal, proteomics, transcriptomics","","True","","","list like error","string_list","","","","" +"dataSubtype","General classification to differentiate between omics profiling modalities. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list, however this is largely only expected in the context of Datasets and files that contain integrated experimental data spanning multiple types.","bulk, none, pseudobulk, single-cell, single-nucleus, spatial","","True","","","list like error","string_list","","","","" +"dataLevel","Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.","1, 2, 3, 4, 5","","True","","","","string","","","","" +"assay","The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.","ASAPSeq, CE-MS, CITESeq, CosMX, CyTOF, GenePS SeqFISH, H&E, LC-MS/MS, NULISA, Olink Explore HT, Olink Flex, Olink Focus, Olink Reveal, Olink Target 48, Olink Target 96, RNASeq, SNP array, SomaScan, VDJSeq, Visium, WES, WGS, Xenium, feature barcode sequencing, flow cytometry, imaging mass cytometry, imaging mass spectrometry, kiloplex, multiplexed ELISA, scRNASeq, scVDJSeq, serial IHC, snATACSeq, snRNASeq","","True","","","list like error","string_list","","","","" +"10xProbeSetReference","Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.","Flex Human Transcriptome Probe Set v1.0.1, Flex Human Transcriptome Probe Set v1.1.0, Visium Human Transcriptome v1, Visium Human Transcriptome v2, custom probe set","","True","","","","string","","","","" +"platform","The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.","BD FACSAria Fusion cell sorter, BD FACSAria III, BD FACSCanto, BD FACSCanto II, BD FACSDiscover A8, BD FACSDiscover S8, BD FACSLyric Clinical, BD FACSMelody, BD FACSymphony S6, BD LSRFortessa, Chromium Controller, Chromium GEM-X Single Cell 3' Chip v4, Chromium Next GEM Chip G, Chromium Next GEM Chip H, Chromium Next GEM Chip K, Chromium Next GEM Chip M, Chromium Next GEM Chip Q, Chromium X, Chromium Xo, Chromium iX, CyTOF XT, Cytek Aurora, Cytek Aurora Evo, Fluidigm BioMark, GEM-X Flex Gene Expression Chip, GEM-X OCM 5' Chip, Helios Mass Cytometer, Hyperion, Illumina HiSeq 2500, Illumina HiSeq X Ten, Illumina NextSeq 500, Illumina NovaSeq 6000, Illumina NovaSeq X, Not Applicable, Olink Signature Q100, Sony MA900, Thermo Fisher Attune CytPix, Thermo Fisher Attune NxT, Thermo Fisher Attune Xenith, Visium CytAssist, Xenium, none, unknown","","True","","","list like error","string_list","","","","" +"specimenModality","Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens","multispecimen, single specimen, unknown","","True","","","","string","","","","" +"individualID","Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.","","","True","","","#ClinicalMetadataTemplate unique error^^#BiospecimenMetadataTemplate str^^list like error","string_list","","","","" +"biospecimenID","A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.","","","True","","","#BiospecimenMetadataTemplate unique error^^list like error","string_list","","","","" +"parentBiospecimenID","The biospecimenID associated with the originating biospecimen for derived or child biospecimens.","","","False","","","","string","","","","" +"biospecimenType","A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.","PBMCs, cell line, fibroblast-like synoviocyte, kidney biopsy, none, plasma, primary cell culture, saliva, salivary gland, serum, skin biopsy, skin swab, stool, suction blister cells, suction blister fluid, synovial fluid, synovial tissue, total leukocytes, urine, uvea, whole blood","","True","","","#BiospecimenMetadataTemplate str^^#InVitroBiospecimenMetadataTemplate^^list like error","string_list","","","","" +"primaryCellSource","A label indicating the biological source material from which a primary cell culture was derived.","PBMCs, kidney, pannus-derived dermis, pannus-derived epidermis, salivary gland, synovial tissue, total leukocytes, urine, uvea, whole blood","","True","","","","string","","","","" +"metadataType","A label further classifying the content of metadata resource.","assay, biospecimen, cell coordinates, data dictionary, file manifest, medication, other, phenotype, protocol, single-cell metadata, target panel, template, tissue microarray map, tissue multiarray map, user manual","","True","","","","string","","","","" +"codingLanguage","The coding, aka programming, language(s) contained in the file marked with `resourceType = code`. Select all that apply.","C, C#, C++, Fortan, Java, Julia, Matlab, Python, R, Ruby, SAS","","True","","","","string","","","","" +"visitID","Ordinal ID distinguishing different patient visits.","","","True","","","","string","","","","" +"softwareAndVersion","Relevant software and version used to generate the data file.","BD FACSDiva 8.0.1, Cell Ranger 9.0.1, Cell Ranger ATAC v1.1.0, Cell Ranger v3.0.0, Cell Ranger v3.0.1, Cell Ranger v3.0.2, Cell Ranger v3.1.0, Cell Ranger v4.0.0, Cell Ranger v5.0.0, Cell Ranger v5.0.1, Cell Ranger v6.0.0, Cell Ranger v6.0.1, Cell Ranger v6.0.2, Cell Ranger v6.1.0, Cell Ranger v6.1.1, Cell Ranger v6.1.2, Cell Ranger v7.0.0, Cell Ranger v7.0.1, Cell Ranger v7.1.0, Cell Ranger v7.2.0, Cell Ranger v8.0.0, Cell Ranger v8.0.1, Cell Ranger v9.0.0, Space Ranger 3.0.0, Space Ranger 3.0.1, Space Ranger 3.1.0, Space Ranger 3.1.1, Space Ranger 3.1.2, Space Ranger 3.1.3, demuxlet","","False","","","","string","","","","" +"targetPanel","A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.","","","True","","","","string","","","","" +"targetPanelSize","The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).","","","False","","","int error","integer","","","","" +"nucleicAcidSource","The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.","BCR mRNA, CRISPR protospacer feature barcode, TCR mRNA, Tn5-accessible gDNA, antigen capture barcode, gDNA, globin-depleted RNA, intracellular protein feature barcode, multiplexing oligo, poly(A) RNA, rRNA-depleted RNA, surface protein feature barcode","","True","","","list like error","string_list","","","","" +"biospecimenSubtype","Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.","FFPE tissue, PFA-fixed tissue, cell or tissue lysate, cell suspension, flow-sorted cells, fresh tissue, frozen tissue, nuclei suspension, supernatant","","False","","","","string","","","","" +"cellRangerOutput","10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz","Not Applicable, filtered MEX, filtered_feature_bc_matrix, filtered_peak_bc_matrix, raw MEX, raw_feature_bc_matrix, raw_peak_bc_matrix","","True","","","","string","","","","" +"alignmentReference","The genomic/transcriptomic reference used for performing read alignment against.","10x Cell Ranger Human GRCh38 2020-A, 10x Cell Ranger Human GRCh38 2024-A, GRCh38, modified GRCh38, unknown, vdj_GRCh38_alts_ensembl-4.0.0","","True","","","","string","","","","" +"libraryPrepMethod","Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.","10x Chromium Fixed RNA Human Transcriptome, 10x Chromium GEM-X Single Cell 3' v4, 10x Chromium GEM-X Single Cell 5' v3, 10x Chromium Next GEM Single Cell 3', 10x Chromium Next GEM Single Cell 3' 3.1, 10x Chromium Next GEM Single Cell 5' v1.1, 10x Chromium Next GEM Single Cell 5' v2, 10x Chromium Next GEM Single Cell ATAC v2, 10x Chromium Single Cell Human BCR, 10x Chromium Single Cell Human TCR, 10x GEM-X Flex Gene Expression Human, 10x GEM-X Universal 5' Gene Expression v3, CEL-Seq2, Chromium Next GEM Single Cell ATAC v1.1, Fluidigm C1 HT, NEBNext Human Immune Sequencing Kit, NEBNext Ultra II Directional RNA Library, Nextera XT, Nextera XT DNA, QIAseq miRNA Library, SMART-Seq Human BCR with UMI, SMART-Seq Human TCR with UMI, SMART-Seq v4 Ultra Low Input RNA, SMARTer Stranded Total RNA v2, Takara Human BCR profiling for Illumina, Takara Human TCR profiling for Illumina, Takara Human TCRv2 profiling for Illumina, Takara Human scTCR profiling for Illumina, TruSeq Stranded mRNA, custom DASH-treatment, in-house library prep","","True","","","","string","","","","" +"datasetType","High-level classification of dataset entity distinguishing between datasets compiled for a specific publication or as a general data resource.","experimental, publication","","True","","","","string","","","","" +"acknowledgmentStatement","A Dataset-specific attribute specifying the path to the wiki subpage within the ARK Portal - backend project that contains the acknowledgement statement that must be included in publications using data from the given dataset as a stipulation of the conditions of use.","syn26710600/wiki/619685","","True","","","","string","","","","" +"ARKRelease","A Dataset-specific attribute specifying the ARK Portal release in which this dataset was first made available to the public.","1.0, 2.0, 2024.06.R1, 2024.07.R1, 2024.08.R1, 2024.09.R1, 2024.10.R1, 2024.12.R1, 2025.01.R1, 2025.02.R1, 2025.03.R1, 2025.04.R1, 2025.05.R1, 2025.06.R1, 2025.07.R1, 2025.08.R1, 2025.09.R1, 2025.10.R1, 2025.11.R1, 2025.12.R1","","True","","","","string","","","","" +"RObjectClass","Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.","ROCR prediction.object, Seurat object, SummarizedExperiment, Symphony reference, data.frame, list, matrix, sparse matrix, vector","","False","","","","string","","","","" +"diagnosis","A high-level classifier indicating the disease status of an individual.","At-Risk RA, Not Applicable, OA, RA, SLE, Sjogren's disease, control, cutaneous lupus erythematosus, dermatomyositis, discoid lupus erythematosus, lupus nephritis, psoriasis, psoriatic arthritis, scleroderma, unknown, vitiligo","","True","","","list like error","string_list","","","","" +"libraryID","A library label or name, unique within an experiment, used to distinguish sequencing libraries.","","","True","","","","string","","","","" +"associatedCodeURL","A URL to the repository where associated code is available.","","","False","","","","string","uri","","","" +"FMAID","Functional Model of Anatomy ontology ID corresponding to the anatomical site and origin of the biospecimen. This attribute is assigned by ARK Portal data managers.","","","True","","https://bioportal.bioontology.org/ontologies/FMA/?p=summary","int error","integer","","","","" +"skinSiteStatus","Disease manifestation status of skin biospecimen.","healthy control, lesional, lesional proximal, non-lesional","","True","","","","string","","","","" +"salivaCollectionProcedure","Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.","stimulated, unstimulated","","True","","","","string","","","","" +"synovialCollectionProcedure","Classification of procedure for synovial tissue collection.","arthroplasty, biopsy, synovectomy, unknown","","True","","","","string","","","","" +"publicationType","General classification of publication.","correction, peer-reviewed, pre-print","","True","","","","string","","","","" +"title","Title of the publication.","","","True","","","","string","","","","" +"journal","Journal in which the publication was released","","","True","","","","string","","","","" +"publicationDate","The publication date extracted from PubMed database","","","True","","","","string","","","","" +"DOI","Digital object identifier","","","True","","","","string","uri-reference","","","" +"anatomicalSite","The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.","left 2nd MCP joint, left ankle joint, left hip joint, left knee joint, left wrist joint, other site, right 1st MTP joint, right 2nd MCP joint, right 2nd MTP joint, right 3rd MCP joint, right ankle joint, right hip joint, right knee joint, right wrist joint, unknown","","True","","","","string","","","","" +"vitiligoPattern","A high-level classification of vitiligo based on the distribution and patterning of lesions across the body.","mixed, non-segmental, segmental, unclassified","","False","","","","string","","","","" +"vitiligoPhenotype","Classification of vitiligo lesions which correlate with autoimmune activity and result in specific skin and depigmentation manifestations at the lesion site. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list.","active, confetti, inflammatory, none, trichrome","","True","","","list like error","string_list","","","","" +"psoriasisType","General type classification of psoriasis disease manifestation.","erythrodermic, guttate, inverse, plaque, pustular","","False","","","","string","","","","" +"ageDiagnosis","Age at which subject was diagnosed with `diagnosis`. If providing this value be sure the unit matches that used for `age`.","","","False","","","num error","number","","","","" +"age","Age at which subject was enrolled in study or age at corresponding visit and data collection event. If value unknown, enter '-1'.","","","True","","","num error","number","","","","" +"ageUnits","The unit of measure used for `ageEnrollment` and `ageDiagnosis`","months, years","","True","","","","string","","","","" +"sex","A textual description of a person's sex at birth.","female, intersex, male, unknown","","True","","","","string","","","","" +"height","Standing height of subject.","","","True","","","num error","number","","","","" +"race","A textual description of a person's race.","American Indian or Alaska Native, Asian, Black or African American, Hispanic, Mixed Race, Native Hawaiian or Other Pacific Islander, White, other, unknown","","False","","","","string","","","","" +"ethnicity","The ethnicity of a person.","Hispanic or Latino, Not Hispanic or Latino, unknown","","False","","","","string","","","","" +"heightUnits","Unit of measure of value provided for `height`.","centimeters, feet, inches, meters","","True","","","","string","","","","" +"weight","Weight of subject. If value unknown, enter '-1'.","","","True","","","num error","number","","","","" +"weightUnits","Abbreviated unit of measure of value provided for `weight`.","g, kg, lb, oz","","True","","","","string","","","","" +"comorbidities","Any diseases or medical condition that is simultaneously present in addition to `diagnosis`.","Hashimoto's Thyroiditis, autoimmune thyroid disease, cardiovascular disease, diabetes, inflammatory bowel disease, multiple sclerosis, other, psoriasis, psoriatic arthritis, pulmonary disease, rheumatoid arthritis, systemic lupus erythematosus","","False","","","list like error","string_list","","","","" +"diabetesType","Type of diabetes mellitus.","gestational, type 1, type 2, unknown","","False","","","","string","","","","" +"VIDA","Vitiligo Disease Activity Score, a six-point scale that evaluates the activity of vitiligo","","","False","","","","string","","","","" +"VASI","Total body Vitiligo Area Severity Index, a measure of the percentage of vitiligo involvement across the body calculated in terms of hand units.","","","False","","","","string","","","","" +"VETI","Vitiligo Extent Tensity Index measures the extent of vitiligo by a numerical score and combines analysis of extensity and severity of vitiligo to produce a constant and reproducible number.","","","False","","https://doi.org/10.5826/dpc.0404a18","num error","number","","","","" +"PASI","Psoriasis Area and Severity Index, is a measurement of the discoloration, thickness, scaling, and coverage of psoriasis plaques.","","","False","","","","string","","","","" +"CDASI","Cutaneous Dermatomyositis Disease Area and Severity Index, is a measurement used to characterize cutaneous dermatomyositis severity.","","","False","","","","string","","","","" +"inputCellCount","An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.","","","True","","","int error","integer","","","","" +"totalReads","Total number of reads sequenced from the library.","","","True","","","int error","integer","","","","" +"FACSPopulation","A description of the marker gating strategy used to derive the population cells with FACS.","","","True","","","","string","","","","" +"cellType","The cell type name from Cell Ontology for the corresponding CL identifier.","","","False","","","","string","","","","" +"userDefinedCellType","User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.","","","False","","","","string","","","","" +"krennInflammatory","A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennLining","A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennStroma","A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennSynovitisScore","The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"datasetStatus","A categorical label indicating the status of an ARK Portal dataset. This is applied to improve downstream management of datasets as well as various ETL workflows.","deprecated, released, test, under peer review, unreleased","","True","","","","string","","","","" +"sampleProcessingBatch","A label indicating batching of sample processing or preparation that occurs prior to data collection.","","","False","","","","string","","","","" +"dataCollectionBatch","A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.","","","False","","","","string","","","","" +"metadataStandards","Metadata standards used to generate the metadata","ARK data model, user-defined","","False","","","","string","","","","" +"skinSunExposure","For skin-based biospecimen, this attribute indicates whether the sample was collected from an anatomical site that does or does not receive routine sun exposure.","not sun exposed, sun exposed","","False","","","","string","","","","" +"sequencingSpikeIn","Pre-made sequencing libraries may be added to your sequencing run to improve sequencing results. If such a 'spike-in' library was used, this attribute specifies the name of the library and the percentage of the spike-in library used in the sequencing run.","","","False","","","","string","","","","" +"librarySpikeIn","Pre-made collections of nucleic acid fragments can be added to samples during the library construction process to improve downstream quantification and statistical analyses. If such a 'spike-in' was used, this attribute specifies the name and manufacturer of the spike-in.","","","False","","","","string","","","","" +"readLength","The number of base pairs (bp) sequenced for reads in a fastq file.","","","False","","","int error","integer","","","","" +"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","programPhase","False","","","","","","","","" +"plateID","An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.","","","False","","","","string","","","","" +"processedDataType","A label used for file annotations to provide a brief description of the processed data file.","barcode counts, differential expression results, epigenomic peaks, gene counts","","False","","","list like error","string_list","","","","" +"eventCount","The total number of events detected and captured in the corresponding FCS file. In cytometry, an event corresponds to particles detected and measured by the instrument. This number corresponds to the number of rows in the FCS file and is expected to be a whole integer number.","","","False","","","int error","integer","","","","" +"notes","Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.","","","False","","","","string","","","","" +"species","The genus species of sample or subject origin.","Homo sapiens","","True","","","","string","","","","" +"treatment","A short descriptive label indicating what experimental treatments have been applied to a biospecimen before data capture. Please include the word 'control' to indicate specific treatments that are intended to serve as a control group.","","","True","","","","string","","","","" +"treatmentTimepoint","Where applicable, specify the timepoint relative to treatment to distinguish different sets of samples that have undergone the same treatment but for different lengths of time. REQUIRED: please specify the unit of time, e.g., secs, mins, hrs, days, etc.","","","False","","","","string","","","","" +"slideID","A distinct label or name, unique within an experiment, assigned to an imaging slides.","","","True","","","","string","","","","" +"altSampleID","An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.","","","False","","","","string","","","","" +"sampleCollectionBatch","A label indicating batching of sample collection or experiment execution that occurs prior to data collection.","","","False","","","","string","","","","" +"associatedAccession","This is a File and Dataset annotation attribute indicating additional accessions (i.e., unique identifiers) associated with the data when the data has also been submitted to or can be found in other repositories such as GEO, SRA, dbGaP, etc.","","","False","","","list like error","string_list","","","","" +"percentCellViability","A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.","","","True","","","inRange 50 100 error","integer","","50","100","" +"sequencingSaturation","A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.","","","False","","https://kb.10xgenomics.com/hc/en-us/articles/115005062366-What-is-sequencing-saturation","inRange 0 1 error","number","","0","1","" +"publicationSynID","The synID of the corresponding Synapse entity that stores metadata about the publication. This is used to differentiate publication-specific files, often consisting of level 4 processed data and expanded subject metadata, in a publication dataset that also includes raw or minimally processed files from experimental datasets. This provides an easy way to distinguish and select for the publication-specific data from which the research findings were derived. When this attribute is used to annotate a Dataset it serves as a way to directly link the Dataset entity with the publication metadata stored in Synapse.","","","False","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"associatedDataset","The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.","","","False","","","list like::regex search ^syn[0-9]{8} error","string_list","","","","^syn[0-9]{8}" +"custom10xProbeSetSynID","If custom modified probe sets were used for collecting 10x Chromium Flex scRNA-seq data, then the probe reference files should be grouped as a zip or tar archive and uploaded as metadata. This attribute links the experimental data back to the probe set archive file via a synapse ID.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"targetPanelSynID","In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"datasetDescription","A Dataset-specific attribute specifying the synID of the folder that contains a wiki write-up of the dataset description. This wiki content will be surfaced on the ARK Portal frontend site.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"DOID","Disease ontology identifier associated with `diagnosis`. Attribute values are applied by ARK Portal data managers.","","","True","","","regex search ^DOID error","string","","","","^DOID" +"BRENDA","BRENDA Tissue and Enzyme Source Ontology ID (BTO) corresponding to the `biospecimenType`. Attribute values are applied by ARK Portal data managers.","","","True","","https://bioportal.bioontology.org/ontologies/BTO","regex search ^BTO error","string","","","","^BTO" +"year","Year (YYYY) in which the paper was published.","","","True","","","regex search [1-2][0-9]{3} error","string","","","","[1-2][0-9]{3}" +"PMID","PubMed(R) Identifier","","","True","","","regex search ^PMID error","string","","","","^PMID" +"ImmPortAccession","Accession to corresponding information in ImmPort.","","","False","","","regex search ^SDY error","string","","","","^SDY" +"cellOntologyID","Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.","","","False","","","regex search ^CL: error","string","","","","^CL:" +"PMCID","Pubmed Central Identifier, formatted as a compact URI string that will automatically resolve into a URL based on the Synapse bioregistry; e.g., pmc:PMCxxxxxxxx","","","False","","","regex search ^pmc:PMC[0-9]{8} error","string","","","","^pmc:PMC[0-9]{8}" diff --git a/model_contexts/olink/ark.olink_context.csv b/model_contexts/olink/ark.olink_context.csv index 05655029..75f6632a 100644 --- a/model_contexts/olink/ark.olink_context.csv +++ b/model_contexts/olink/ark.olink_context.csv @@ -1,9 +1,9 @@ -"Attribute","Description","Valid Values","DependsOn","Properties","Required","Parent","DependsOn Component","Source","Validation Rules","columnType" -"Olink Assay Metadata Template","A template outlining assay metadata to be collected for each plate in an Olink dataset.","","Component, assay, plateID, platform, targetPanel, targetPanelSynID, targetPanelSize","","","","","","","" -"Olink File Annotation Template","A template outlining metadata to be provided by contributors that is applied as a preliminary set of annotations to Olink dataset files.","","Component, fileFormat, resourceType, plateID, specimenModality","","","","","","","" -"BDM Olink File Annotations","A template outlining the complete set of metadata to be applied as annotations to Olink dataset files by ARK Portal BDM.","","Component, fileFormat, resourceType, plateID, platform, targetPanel, targetPanelSynID, targetPanelSize, diagnosis, specimenModality, dataType, dataSubtype, visitID, program, project, biospecimenType, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","" -"assay","The technology used to generate the data in this file.","Olink Explore HT, Olink Reveal, Olink Flex, Olink Focus, Olink Target 48, Olink Target 96","","","TRUE","","","","","string" -"platform","The specific instrument (manufacturer, model, etc.) that was used to carry out a laboratory or computational experiment.","Olink Signature Q100, Fluidigm BioMark, Illumina NextSeq 500, Illumina NovaSeq 6000, unknown","","","FALSE","","","","list like error","string" -"plateID","An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.","","","","TRUE","","","","^^#OlinkAssayMetadataTemplate unique error^^list like error","string" -"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID, programPhase","","FALSE","","","","","" -"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" +"Attribute","Description","Valid Values","DependsOn","Required","Parent","Source","Validation Rules","columnType","Format","Minimum","Maximum","Pattern" +"Olink Assay Metadata Template","A template outlining assay metadata to be collected for each plate in an Olink dataset.","","Component, assay, plateID, platform, targetPanel, targetPanelSynID, targetPanelSize","","","","","","","","","" +"Olink File Annotation Template","A template outlining metadata to be provided by contributors that is applied as a preliminary set of annotations to Olink dataset files.","","Component, fileFormat, resourceType, plateID, specimenModality","","","","","","","","","" +"BDM Olink File Annotations","A template outlining the complete set of metadata to be applied as annotations to Olink dataset files by ARK Portal BDM.","","Component, fileFormat, resourceType, plateID, platform, targetPanel, targetPanelSynID, targetPanelSize, diagnosis, specimenModality, dataType, dataSubtype, visitID, program, project, biospecimenType, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","","","" +"assay","The technology used to generate the data in this file.","Olink Explore HT, Olink Reveal, Olink Flex, Olink Focus, Olink Target 48, Olink Target 96","","TRUE","","","","string","","","","" +"platform","The specific instrument (manufacturer, model, etc.) that was used to carry out a laboratory or computational experiment.","Olink Signature Q100, Fluidigm BioMark, Illumina NextSeq 500, Illumina NovaSeq 6000, unknown","","FALSE","","","list like error","string_list","","","","" +"plateID","An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.","","","TRUE","","","^^#OlinkAssayMetadataTemplate unique error^^list like error","string_list","","","","" +"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID, programPhase","FALSE","","","","","","","","" +"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" diff --git a/model_contexts/olink/ark.olink_model.csv b/model_contexts/olink/ark.olink_model.csv index f30a6313..02db56c4 100644 --- a/model_contexts/olink/ark.olink_model.csv +++ b/model_contexts/olink/ark.olink_model.csv @@ -1,114 +1,114 @@ -"Attribute","Description","Valid Values","DependsOn","Properties","Required","Parent","DependsOn Component","Source","Validation Rules","columnType" -"Olink Assay Metadata Template","A template outlining assay metadata to be collected for each plate in an Olink dataset.","","Component, assay, plateID, platform, targetPanel, targetPanelSynID, targetPanelSize","","","","","","","" -"Olink File Annotation Template","A template outlining metadata to be provided by contributors that is applied as a preliminary set of annotations to Olink dataset files.","","Component, fileFormat, resourceType, plateID, specimenModality","","","","","","","" -"BDM Olink File Annotations","A template outlining the complete set of metadata to be applied as annotations to Olink dataset files by ARK Portal BDM.","","Component, fileFormat, resourceType, plateID, platform, targetPanel, targetPanelSynID, targetPanelSize, diagnosis, specimenModality, dataType, dataSubtype, visitID, program, project, biospecimenType, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","" -"assay","The technology used to generate the data in this file.","Olink Explore HT, Olink Reveal, Olink Flex, Olink Focus, Olink Target 48, Olink Target 96","","","TRUE","","","","","string" -"platform","The specific instrument (manufacturer, model, etc.) that was used to carry out a laboratory or computational experiment.","Olink Signature Q100, Fluidigm BioMark, Illumina NextSeq 500, Illumina NovaSeq 6000, unknown","","","FALSE","","","","list like error","string" -"plateID","An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.","","","","TRUE","","","","^^#OlinkAssayMetadataTemplate unique error^^list like error","string" -"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID, programPhase","","FALSE","","","","","" -"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" -"Component","A high-level attribute for grouping attributes into templates.","","","","True","","","","","string" -"fileFormat","Standard file format name or file extension","bai, bam, bed, bim, csv, czi, docx, dose, erate, fam, fastq, fcs, geojson, h5, h5ad, info, mcd, mtx, parquet, pdf, py, rds, rec, svs, tbi, tgz, tsv, txt, vcf, xls, xlsx, zip","","","True","","","","","string" -"publicationSynID","The synID of the corresponding Synapse entity that stores metadata about the publication. This is used to differentiate publication-specific files, often consisting of level 4 processed data and expanded subject metadata, in a publication dataset that also includes raw or minimally processed files from experimental datasets. This provides an easy way to distinguish and select for the publication-specific data from which the research findings were derived. When this attribute is used to annotate a Dataset it serves as a way to directly link the Dataset entity with the publication metadata stored in Synapse.","","","","False","","","","regex search ^syn[0-9]{8} error","string" -"associatedDataset","The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.","","","","False","","","","list like::regex search ^syn[0-9]{8} error","string" -"program","Name of the funding program that supported the generation of data and associated files","AMP AIM, AMP RA/SLE, Community Contribution","","","True","","","","","string" -"project","A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.","AIM for RA, ELLIPSS, LOCKIT, RA, SLE, STAMP, UMass V-CoRT","","","True","","","","list like error","string" -"programPhase","A label noting which AMP RA/SLE program phase generated the data.","I, II","","","True","","","","list like error","string" -"resourceType","High-level classification of the file content","code, experimental data, figure, metadata","","","True","","","","","string" -"dataType","High-level classification of the type of data contained in the file, loosely related to the experimental method or biological entity that is being profiled. Select all that apply using a comma-delimited list, though in most cases only a single label is expected. For multimodal datasets with concomitant profiling of biospecimen include 'multimodal'.","cytometry, epigenomics, genomics, histology, immune repertoire profiling, immunostaining, lipidomics, metabolomics, microbiome, multimodal, proteomics, transcriptomics","","","True","","","","list like error","string" -"dataSubtype","General classification to differentiate between omics profiling modalities. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list, however this is largely only expected in the context of Datasets and files that contain integrated experimental data spanning multiple types.","bulk, none, pseudobulk, single-cell, single-nucleus, spatial","","","True","","","","list like error","string" -"dataLevel","Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.","1, 2, 3, 4, 5","","","True","","","","","string" -"10xProbeSetReference","Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.","Flex Human Transcriptome Probe Set v1.0.1, Flex Human Transcriptome Probe Set v1.1.0, Visium Human Transcriptome v1, Visium Human Transcriptome v2, custom probe set","","","True","","","","","string" -"custom10xProbeSetSynID","If custom modified probe sets were used for collecting 10x Chromium Flex scRNA-seq data, then the probe reference files should be grouped as a zip or tar archive and uploaded as metadata. This attribute links the experimental data back to the probe set archive file via a synapse ID.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"specimenModality","Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens","multispecimen, single specimen, unknown","","","True","","","","","string" -"individualID","Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.","","","","True","","","","#ClinicalMetadataTemplate unique error^^#BiospecimenMetadataTemplate str^^list like error","string" -"biospecimenID","A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.","","","","True","","","","#BiospecimenMetadataTemplate unique error^^list like error","string" -"parentBiospecimenID","The biospecimenID associated with the originating biospecimen for derived or child biospecimens.","","","","False","","","","","string" -"biospecimenType","A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.","PBMCs, cell line, fibroblast-like synoviocyte, kidney biopsy, none, plasma, primary cell culture, saliva, salivary gland, serum, skin biopsy, skin swab, stool, suction blister cells, suction blister fluid, synovial fluid, synovial tissue, total leukocytes, urine, uvea, whole blood","","","True","","","","#BiospecimenMetadataTemplate str^^#InVitroBiospecimenMetadataTemplate^^list like error","string" -"primaryCellSource","A label indicating the biological source material from which a primary cell culture was derived.","PBMCs, kidney, pannus-derived dermis, pannus-derived epidermis, salivary gland, synovial tissue, total leukocytes, urine, uvea, whole blood","","","True","","","","","string" -"metadataType","A label further classifying the content of metadata resource.","assay, biospecimen, cell coordinates, data dictionary, file manifest, medication, other, phenotype, protocol, single-cell metadata, target panel, template, tissue microarray map, tissue multiarray map, user manual","","","True","","","","","string" -"codingLanguage","The coding, aka programming, language(s) contained in the file marked with `resourceType = code`. Select all that apply.","C, C#, C++, Fortan, Java, Julia, Matlab, Python, R, Ruby, SAS","","","True","","","","","string" -"visitID","Ordinal ID distinguishing different patient visits.","","","","True","","","","","string" -"softwareAndVersion","Relevant software and version used to generate the data file.","BD FACSDiva 8.0.1, Cell Ranger 9.0.1, Cell Ranger ATAC v1.1.0, Cell Ranger v3.0.0, Cell Ranger v3.0.1, Cell Ranger v3.0.2, Cell Ranger v3.1.0, Cell Ranger v4.0.0, Cell Ranger v5.0.0, Cell Ranger v5.0.1, Cell Ranger v6.0.0, Cell Ranger v6.0.1, Cell Ranger v6.0.2, Cell Ranger v6.1.0, Cell Ranger v6.1.1, Cell Ranger v6.1.2, Cell Ranger v7.0.0, Cell Ranger v7.0.1, Cell Ranger v7.1.0, Cell Ranger v7.2.0, Cell Ranger v8.0.0, Cell Ranger v8.0.1, Cell Ranger v9.0.0, Space Ranger 3.0.0, Space Ranger 3.0.1, Space Ranger 3.1.0, Space Ranger 3.1.1, Space Ranger 3.1.2, Space Ranger 3.1.3, demuxlet","","","False","","","","","string" -"targetPanel","A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.","","","","True","","","","","string" -"targetPanelSynID","In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"targetPanelSize","The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).","","","","False","","","","int error","integer" -"nucleicAcidSource","The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.","BCR mRNA, CRISPR protospacer feature barcode, TCR mRNA, Tn5-accessible gDNA, antigen capture barcode, gDNA, globin-depleted RNA, intracellular protein feature barcode, multiplexing oligo, poly(A) RNA, rRNA-depleted RNA, surface protein feature barcode","","","True","","","","list like error","string" -"biospecimenSubtype","Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.","FFPE tissue, PFA-fixed tissue, cell or tissue lysate, cell suspension, flow-sorted cells, fresh tissue, frozen tissue, nuclei suspension, supernatant","","","False","","","","","string" -"cellRangerOutput","10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz","Not Applicable, filtered MEX, filtered_feature_bc_matrix, filtered_peak_bc_matrix, raw MEX, raw_feature_bc_matrix, raw_peak_bc_matrix","","","True","","","","","string" -"alignmentReference","The genomic/transcriptomic reference used for performing read alignment against.","10x Cell Ranger Human GRCh38 2020-A, 10x Cell Ranger Human GRCh38 2024-A, GRCh38, modified GRCh38, unknown, vdj_GRCh38_alts_ensembl-4.0.0","","","True","","","","","string" -"libraryPrepMethod","Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.","10x Chromium Fixed RNA Human Transcriptome, 10x Chromium GEM-X Single Cell 3' v4, 10x Chromium GEM-X Single Cell 5' v3, 10x Chromium Next GEM Single Cell 3', 10x Chromium Next GEM Single Cell 3' 3.1, 10x Chromium Next GEM Single Cell 5' v1.1, 10x Chromium Next GEM Single Cell 5' v2, 10x Chromium Next GEM Single Cell ATAC v2, 10x Chromium Single Cell Human BCR, 10x Chromium Single Cell Human TCR, 10x GEM-X Flex Gene Expression Human, 10x GEM-X Universal 5' Gene Expression v3, CEL-Seq2, Chromium Next GEM Single Cell ATAC v1.1, Fluidigm C1 HT, NEBNext Human Immune Sequencing Kit, NEBNext Ultra II Directional RNA Library, Nextera XT, Nextera XT DNA, QIAseq miRNA Library, SMART-Seq Human BCR with UMI, SMART-Seq Human TCR with UMI, SMART-Seq v4 Ultra Low Input RNA, SMARTer Stranded Total RNA v2, Takara Human BCR profiling for Illumina, Takara Human TCR profiling for Illumina, Takara Human TCRv2 profiling for Illumina, Takara Human scTCR profiling for Illumina, TruSeq Stranded mRNA, custom DASH-treatment, in-house library prep","","","True","","","","","string" -"datasetType","High-level classification of dataset entity distinguishing between datasets compiled for a specific publication or as a general data resource.","experimental, publication","","","True","","","","","string" -"acknowledgmentStatement","A Dataset-specific attribute specifying the path to the wiki subpage within the ARK Portal - backend project that contains the acknowledgement statement that must be included in publications using data from the given dataset as a stipulation of the conditions of use.","syn26710600/wiki/619685","","","True","","","","","string" -"datasetDescription","A Dataset-specific attribute specifying the synID of the folder that contains a wiki write-up of the dataset description. This wiki content will be surfaced on the ARK Portal frontend site.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"ARKRelease","A Dataset-specific attribute specifying the ARK Portal release in which this dataset was first made available to the public.","1.0, 2.0, 2024.06.R1, 2024.07.R1, 2024.08.R1, 2024.09.R1, 2024.10.R1, 2024.12.R1, 2025.01.R1, 2025.02.R1, 2025.03.R1, 2025.04.R1, 2025.05.R1, 2025.06.R1, 2025.07.R1, 2025.08.R1, 2025.09.R1, 2025.10.R1, 2025.11.R1, 2025.12.R1","","","True","","","","","string" -"RObjectClass","Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.","ROCR prediction.object, Seurat object, SummarizedExperiment, Symphony reference, data.frame, list, matrix, sparse matrix, vector","","","False","","","","","string" -"diagnosis","A high-level classifier indicating the disease status of an individual.","At-Risk RA, Not Applicable, OA, RA, SLE, Sjogren's disease, control, cutaneous lupus erythematosus, dermatomyositis, discoid lupus erythematosus, lupus nephritis, psoriasis, psoriatic arthritis, scleroderma, unknown, vitiligo","","","True","","","","list like error","string" -"DOID","Disease ontology identifier associated with `diagnosis`. Attribute values are applied by ARK Portal data managers.","","","","True","","","","regex search ^DOID error","string" -"libraryID","A library label or name, unique within an experiment, used to distinguish sequencing libraries.","","","","True","","","","","string" -"associatedCodeURL","A URL to the repository where associated code is available.","","","","False","","","","","string" -"FMAID","Functional Model of Anatomy ontology ID corresponding to the anatomical site and origin of the biospecimen. This attribute is assigned by ARK Portal data managers.","","","","True","","","https://bioportal.bioontology.org/ontologies/FMA/?p=summary","int error","integer" -"BRENDA","BRENDA Tissue and Enzyme Source Ontology ID (BTO) corresponding to the `biospecimenType`. Attribute values are applied by ARK Portal data managers.","","","","True","","","https://bioportal.bioontology.org/ontologies/BTO","regex search ^BTO error","string" -"skinSiteStatus","Disease manifestation status of skin biospecimen.","healthy control, lesional, lesional proximal, non-lesional","","","True","","","","","string" -"salivaCollectionProcedure","Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.","stimulated, unstimulated","","","True","","","","","string" -"synovialCollectionProcedure","Classification of procedure for synovial tissue collection.","arthroplasty, biopsy, synovectomy, unknown","","","True","","","","","string" -"publicationType","General classification of publication.","correction, peer-reviewed, pre-print","","","True","","","","","string" -"title","Title of the publication.","","","","True","","","","","string" -"journal","Journal in which the publication was released","","","","True","","","","","string" -"year","Year (YYYY) in which the paper was published.","","","","True","","","","regex search [1-2][0-9]{3} error","string" -"publicationDate","The publication date extracted from PubMed database","","","","True","","","","","string" -"PMID","PubMed(R) Identifier","","","","True","","","","regex search ^PMID error","string" -"DOI","Digital object identifier","","","","True","","","","","string" -"anatomicalSite","The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.","left 2nd MCP joint, left ankle joint, left hip joint, left knee joint, left wrist joint, other site, right 1st MTP joint, right 2nd MCP joint, right 2nd MTP joint, right 3rd MCP joint, right ankle joint, right hip joint, right knee joint, right wrist joint, unknown","","","True","","","","","string" -"vitiligoPattern","A high-level classification of vitiligo based on the distribution and patterning of lesions across the body.","mixed, non-segmental, segmental, unclassified","","","False","","","","","string" -"vitiligoPhenotype","Classification of vitiligo lesions which correlate with autoimmune activity and result in specific skin and depigmentation manifestations at the lesion site. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list.","active, confetti, inflammatory, none, trichrome","","","True","","","","list like error","string" -"psoriasisType","General type classification of psoriasis disease manifestation.","erythrodermic, guttate, inverse, plaque, pustular","","","False","","","","","string" -"ageDiagnosis","Age at which subject was diagnosed with `diagnosis`. If providing this value be sure the unit matches that used for `age`.","","","","False","","","","num error","number" -"age","Age at which subject was enrolled in study or age at corresponding visit and data collection event. If value unknown, enter '-1'.","","","","True","","","","num error","number" -"ageUnits","The unit of measure used for `ageEnrollment` and `ageDiagnosis`","months, years","","","True","","","","","string" -"sex","A textual description of a person's sex at birth.","female, intersex, male, unknown","","","True","","","","","string" -"height","Standing height of subject.","","","","True","","","","num error","number" -"race","A textual description of a person's race.","American Indian or Alaska Native, Asian, Black or African American, Hispanic, Mixed Race, Native Hawaiian or Other Pacific Islander, White, other, unknown","","","False","","","","","string" -"ethnicity","The ethnicity of a person.","Hispanic or Latino, Not Hispanic or Latino, unknown","","","False","","","","","string" -"heightUnits","Unit of measure of value provided for `height`.","centimeters, feet, inches, meters","","","True","","","","","string" -"weight","Weight of subject. If value unknown, enter '-1'.","","","","True","","","","num error","number" -"weightUnits","Abbreviated unit of measure of value provided for `weight`.","g, kg, lb, oz","","","True","","","","","string" -"comorbidities","Any diseases or medical condition that is simultaneously present in addition to `diagnosis`.","Hashimoto's Thyroiditis, autoimmune thyroid disease, cardiovascular disease, diabetes, inflammatory bowel disease, multiple sclerosis, other, psoriasis, psoriatic arthritis, pulmonary disease, rheumatoid arthritis, systemic lupus erythematosus","","","False","","","","list like error","string" -"diabetesType","Type of diabetes mellitus.","gestational, type 1, type 2, unknown","","","False","","","","","string" -"VIDA","Vitiligo Disease Activity Score, a six-point scale that evaluates the activity of vitiligo","","","","False","","","","","string" -"VASI","Total body Vitiligo Area Severity Index, a measure of the percentage of vitiligo involvement across the body calculated in terms of hand units.","","","","False","","","","","string" -"VETI","Vitiligo Extent Tensity Index measures the extent of vitiligo by a numerical score and combines analysis of extensity and severity of vitiligo to produce a constant and reproducible number.","","","","False","","","https://doi.org/10.5826/dpc.0404a18","num error","number" -"PASI","Psoriasis Area and Severity Index, is a measurement of the discoloration, thickness, scaling, and coverage of psoriasis plaques.","","","","False","","","","","string" -"CDASI","Cutaneous Dermatomyositis Disease Area and Severity Index, is a measurement used to characterize cutaneous dermatomyositis severity.","","","","False","","","","","string" -"percentCellViability","A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.","","","","True","","","","inRange 50 100 error","integer" -"inputCellCount","An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.","","","","True","","","","int error","integer" -"totalReads","Total number of reads sequenced from the library.","","","","True","","","","int error","integer" -"sequencingSaturation","A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.","","","","False","","","https://kb.10xgenomics.com/hc/en-us/articles/115005062366-What-is-sequencing-saturation","inRange 0 1 error","number" -"ImmPortAccession","Accession to corresponding information in ImmPort.","","","","False","","","","regex search ^SDY error","string" -"FACSPopulation","A description of the marker gating strategy used to derive the population cells with FACS.","","","","True","","","","","string" -"cellOntologyID","Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.","","","","False","","","","regex search ^CL: error","string" -"cellType","The cell type name from Cell Ontology for the corresponding CL identifier.","","","","False","","","","","string" -"userDefinedCellType","User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.","","","","False","","","","","string" -"krennInflammatory","A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennLining","A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennStroma","A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennSynovitisScore","The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"datasetStatus","A categorical label indicating the status of an ARK Portal dataset. This is applied to improve downstream management of datasets as well as various ETL workflows.","deprecated, released, test, under peer review, unreleased","","","True","","","","","string" -"sampleProcessingBatch","A label indicating batching of sample processing or preparation that occurs prior to data collection.","","","","False","","","","","string" -"dataCollectionBatch","A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.","","","","False","","","","","string" -"metadataStandards","Metadata standards used to generate the metadata","ARK data model, user-defined","","","False","","","","","string" -"skinSunExposure","For skin-based biospecimen, this attribute indicates whether the sample was collected from an anatomical site that does or does not receive routine sun exposure.","not sun exposed, sun exposed","","","False","","","","","string" -"sequencingSpikeIn","Pre-made sequencing libraries may be added to your sequencing run to improve sequencing results. If such a 'spike-in' library was used, this attribute specifies the name of the library and the percentage of the spike-in library used in the sequencing run.","","","","False","","","","","string" -"librarySpikeIn","Pre-made collections of nucleic acid fragments can be added to samples during the library construction process to improve downstream quantification and statistical analyses. If such a 'spike-in' was used, this attribute specifies the name and manufacturer of the spike-in.","","","","False","","","","","string" -"readLength","The number of base pairs (bp) sequenced for reads in a fastq file.","","","","False","","","","int error","integer" -"PMCID","Pubmed Central Identifier, formatted as a compact URI string that will automatically resolve into a URL based on the Synapse bioregistry; e.g., pmc:PMCxxxxxxxx","","","","False","","","","regex search ^pmc:PMC[0-9]{8} error","string" -"processedDataType","A label used for file annotations to provide a brief description of the processed data file.","barcode counts, differential expression results, epigenomic peaks, gene counts","","","False","","","","list like error","string" -"eventCount","The total number of events detected and captured in the corresponding FCS file. In cytometry, an event corresponds to particles detected and measured by the instrument. This number corresponds to the number of rows in the FCS file and is expected to be a whole integer number.","","","","False","","","","int error","integer" -"notes","Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.","","","","False","","","","","string" -"species","The genus species of sample or subject origin.","Homo sapiens","","","True","","","","","string" -"treatment","A short descriptive label indicating what experimental treatments have been applied to a biospecimen before data capture. Please include the word 'control' to indicate specific treatments that are intended to serve as a control group.","","","","True","","","","","string" -"treatmentTimepoint","Where applicable, specify the timepoint relative to treatment to distinguish different sets of samples that have undergone the same treatment but for different lengths of time. REQUIRED: please specify the unit of time, e.g., secs, mins, hrs, days, etc.","","","","False","","","","","string" -"slideID","A distinct label or name, unique within an experiment, assigned to an imaging slides.","","","","True","","","","","string" -"altSampleID","An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.","","","","False","","","","","string" -"sampleCollectionBatch","A label indicating batching of sample collection or experiment execution that occurs prior to data collection.","","","","False","","","","","string" -"associatedAccession","This is a File and Dataset annotation attribute indicating additional accessions (i.e., unique identifiers) associated with the data when the data has also been submitted to or can be found in other repositories such as GEO, SRA, dbGaP, etc.","","","","False","","","","list like error","string" +"Attribute","Description","Valid Values","DependsOn","Required","Parent","Source","Validation Rules","columnType","Format","Minimum","Maximum","Pattern" +"Olink Assay Metadata Template","A template outlining assay metadata to be collected for each plate in an Olink dataset.","","Component, assay, plateID, platform, targetPanel, targetPanelSynID, targetPanelSize","","","","","","","","","" +"Olink File Annotation Template","A template outlining metadata to be provided by contributors that is applied as a preliminary set of annotations to Olink dataset files.","","Component, fileFormat, resourceType, plateID, specimenModality","","","","","","","","","" +"BDM Olink File Annotations","A template outlining the complete set of metadata to be applied as annotations to Olink dataset files by ARK Portal BDM.","","Component, fileFormat, resourceType, plateID, platform, targetPanel, targetPanelSynID, targetPanelSize, diagnosis, specimenModality, dataType, dataSubtype, visitID, program, project, biospecimenType, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","","","" +"assay","The technology used to generate the data in this file.","Olink Explore HT, Olink Reveal, Olink Flex, Olink Focus, Olink Target 48, Olink Target 96","","TRUE","","","","string","","","","" +"platform","The specific instrument (manufacturer, model, etc.) that was used to carry out a laboratory or computational experiment.","Olink Signature Q100, Fluidigm BioMark, Illumina NextSeq 500, Illumina NovaSeq 6000, unknown","","FALSE","","","list like error","string_list","","","","" +"plateID","An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.","","","TRUE","","","^^#OlinkAssayMetadataTemplate unique error^^list like error","string_list","","","","" +"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID, programPhase","FALSE","","","","","","","","" +"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" +"Component","A high-level attribute for grouping attributes into templates.","","","True","","","","string","","","","" +"fileFormat","Standard file format name or file extension","bai, bam, bed, bim, csv, czi, docx, dose, erate, fam, fastq, fcs, geojson, h5, h5ad, info, mcd, mtx, parquet, pdf, py, rds, rec, svs, tbi, tgz, tsv, txt, vcf, xls, xlsx, zip","","True","","","","string","","","","" +"program","Name of the funding program that supported the generation of data and associated files","AMP AIM, AMP RA/SLE, Community Contribution","","True","","","","string","","","","" +"project","A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.","AIM for RA, ELLIPSS, LOCKIT, RA, SLE, STAMP, UMass V-CoRT","","True","","","list like error","string_list","","","","" +"programPhase","A label noting which AMP RA/SLE program phase generated the data.","I, II","","True","","","list like error","string_list","","","","" +"resourceType","High-level classification of the file content","code, experimental data, figure, metadata","","True","","","","string","","","","" +"dataType","High-level classification of the type of data contained in the file, loosely related to the experimental method or biological entity that is being profiled. Select all that apply using a comma-delimited list, though in most cases only a single label is expected. For multimodal datasets with concomitant profiling of biospecimen include 'multimodal'.","cytometry, epigenomics, genomics, histology, immune repertoire profiling, immunostaining, lipidomics, metabolomics, microbiome, multimodal, proteomics, transcriptomics","","True","","","list like error","string_list","","","","" +"dataSubtype","General classification to differentiate between omics profiling modalities. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list, however this is largely only expected in the context of Datasets and files that contain integrated experimental data spanning multiple types.","bulk, none, pseudobulk, single-cell, single-nucleus, spatial","","True","","","list like error","string_list","","","","" +"dataLevel","Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.","1, 2, 3, 4, 5","","True","","","","string","","","","" +"10xProbeSetReference","Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.","Flex Human Transcriptome Probe Set v1.0.1, Flex Human Transcriptome Probe Set v1.1.0, Visium Human Transcriptome v1, Visium Human Transcriptome v2, custom probe set","","True","","","","string","","","","" +"specimenModality","Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens","multispecimen, single specimen, unknown","","True","","","","string","","","","" +"individualID","Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.","","","True","","","#ClinicalMetadataTemplate unique error^^#BiospecimenMetadataTemplate str^^list like error","string_list","","","","" +"biospecimenID","A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.","","","True","","","#BiospecimenMetadataTemplate unique error^^list like error","string_list","","","","" +"parentBiospecimenID","The biospecimenID associated with the originating biospecimen for derived or child biospecimens.","","","False","","","","string","","","","" +"biospecimenType","A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.","PBMCs, cell line, fibroblast-like synoviocyte, kidney biopsy, none, plasma, primary cell culture, saliva, salivary gland, serum, skin biopsy, skin swab, stool, suction blister cells, suction blister fluid, synovial fluid, synovial tissue, total leukocytes, urine, uvea, whole blood","","True","","","#BiospecimenMetadataTemplate str^^#InVitroBiospecimenMetadataTemplate^^list like error","string_list","","","","" +"primaryCellSource","A label indicating the biological source material from which a primary cell culture was derived.","PBMCs, kidney, pannus-derived dermis, pannus-derived epidermis, salivary gland, synovial tissue, total leukocytes, urine, uvea, whole blood","","True","","","","string","","","","" +"metadataType","A label further classifying the content of metadata resource.","assay, biospecimen, cell coordinates, data dictionary, file manifest, medication, other, phenotype, protocol, single-cell metadata, target panel, template, tissue microarray map, tissue multiarray map, user manual","","True","","","","string","","","","" +"codingLanguage","The coding, aka programming, language(s) contained in the file marked with `resourceType = code`. Select all that apply.","C, C#, C++, Fortan, Java, Julia, Matlab, Python, R, Ruby, SAS","","True","","","","string","","","","" +"visitID","Ordinal ID distinguishing different patient visits.","","","True","","","","string","","","","" +"softwareAndVersion","Relevant software and version used to generate the data file.","BD FACSDiva 8.0.1, Cell Ranger 9.0.1, Cell Ranger ATAC v1.1.0, Cell Ranger v3.0.0, Cell Ranger v3.0.1, Cell Ranger v3.0.2, Cell Ranger v3.1.0, Cell Ranger v4.0.0, Cell Ranger v5.0.0, Cell Ranger v5.0.1, Cell Ranger v6.0.0, Cell Ranger v6.0.1, Cell Ranger v6.0.2, Cell Ranger v6.1.0, Cell Ranger v6.1.1, Cell Ranger v6.1.2, Cell Ranger v7.0.0, Cell Ranger v7.0.1, Cell Ranger v7.1.0, Cell Ranger v7.2.0, Cell Ranger v8.0.0, Cell Ranger v8.0.1, Cell Ranger v9.0.0, Space Ranger 3.0.0, Space Ranger 3.0.1, Space Ranger 3.1.0, Space Ranger 3.1.1, Space Ranger 3.1.2, Space Ranger 3.1.3, demuxlet","","False","","","","string","","","","" +"targetPanel","A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.","","","True","","","","string","","","","" +"targetPanelSize","The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).","","","False","","","int error","integer","","","","" +"nucleicAcidSource","The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.","BCR mRNA, CRISPR protospacer feature barcode, TCR mRNA, Tn5-accessible gDNA, antigen capture barcode, gDNA, globin-depleted RNA, intracellular protein feature barcode, multiplexing oligo, poly(A) RNA, rRNA-depleted RNA, surface protein feature barcode","","True","","","list like error","string_list","","","","" +"biospecimenSubtype","Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.","FFPE tissue, PFA-fixed tissue, cell or tissue lysate, cell suspension, flow-sorted cells, fresh tissue, frozen tissue, nuclei suspension, supernatant","","False","","","","string","","","","" +"cellRangerOutput","10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz","Not Applicable, filtered MEX, filtered_feature_bc_matrix, filtered_peak_bc_matrix, raw MEX, raw_feature_bc_matrix, raw_peak_bc_matrix","","True","","","","string","","","","" +"alignmentReference","The genomic/transcriptomic reference used for performing read alignment against.","10x Cell Ranger Human GRCh38 2020-A, 10x Cell Ranger Human GRCh38 2024-A, GRCh38, modified GRCh38, unknown, vdj_GRCh38_alts_ensembl-4.0.0","","True","","","","string","","","","" +"libraryPrepMethod","Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.","10x Chromium Fixed RNA Human Transcriptome, 10x Chromium GEM-X Single Cell 3' v4, 10x Chromium GEM-X Single Cell 5' v3, 10x Chromium Next GEM Single Cell 3', 10x Chromium Next GEM Single Cell 3' 3.1, 10x Chromium Next GEM Single Cell 5' v1.1, 10x Chromium Next GEM Single Cell 5' v2, 10x Chromium Next GEM Single Cell ATAC v2, 10x Chromium Single Cell Human BCR, 10x Chromium Single Cell Human TCR, 10x GEM-X Flex Gene Expression Human, 10x GEM-X Universal 5' Gene Expression v3, CEL-Seq2, Chromium Next GEM Single Cell ATAC v1.1, Fluidigm C1 HT, NEBNext Human Immune Sequencing Kit, NEBNext Ultra II Directional RNA Library, Nextera XT, Nextera XT DNA, QIAseq miRNA Library, SMART-Seq Human BCR with UMI, SMART-Seq Human TCR with UMI, SMART-Seq v4 Ultra Low Input RNA, SMARTer Stranded Total RNA v2, Takara Human BCR profiling for Illumina, Takara Human TCR profiling for Illumina, Takara Human TCRv2 profiling for Illumina, Takara Human scTCR profiling for Illumina, TruSeq Stranded mRNA, custom DASH-treatment, in-house library prep","","True","","","","string","","","","" +"datasetType","High-level classification of dataset entity distinguishing between datasets compiled for a specific publication or as a general data resource.","experimental, publication","","True","","","","string","","","","" +"acknowledgmentStatement","A Dataset-specific attribute specifying the path to the wiki subpage within the ARK Portal - backend project that contains the acknowledgement statement that must be included in publications using data from the given dataset as a stipulation of the conditions of use.","syn26710600/wiki/619685","","True","","","","string","","","","" +"ARKRelease","A Dataset-specific attribute specifying the ARK Portal release in which this dataset was first made available to the public.","1.0, 2.0, 2024.06.R1, 2024.07.R1, 2024.08.R1, 2024.09.R1, 2024.10.R1, 2024.12.R1, 2025.01.R1, 2025.02.R1, 2025.03.R1, 2025.04.R1, 2025.05.R1, 2025.06.R1, 2025.07.R1, 2025.08.R1, 2025.09.R1, 2025.10.R1, 2025.11.R1, 2025.12.R1","","True","","","","string","","","","" +"RObjectClass","Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.","ROCR prediction.object, Seurat object, SummarizedExperiment, Symphony reference, data.frame, list, matrix, sparse matrix, vector","","False","","","","string","","","","" +"diagnosis","A high-level classifier indicating the disease status of an individual.","At-Risk RA, Not Applicable, OA, RA, SLE, Sjogren's disease, control, cutaneous lupus erythematosus, dermatomyositis, discoid lupus erythematosus, lupus nephritis, psoriasis, psoriatic arthritis, scleroderma, unknown, vitiligo","","True","","","list like error","string_list","","","","" +"libraryID","A library label or name, unique within an experiment, used to distinguish sequencing libraries.","","","True","","","","string","","","","" +"associatedCodeURL","A URL to the repository where associated code is available.","","","False","","","","string","uri","","","" +"FMAID","Functional Model of Anatomy ontology ID corresponding to the anatomical site and origin of the biospecimen. This attribute is assigned by ARK Portal data managers.","","","True","","https://bioportal.bioontology.org/ontologies/FMA/?p=summary","int error","integer","","","","" +"skinSiteStatus","Disease manifestation status of skin biospecimen.","healthy control, lesional, lesional proximal, non-lesional","","True","","","","string","","","","" +"salivaCollectionProcedure","Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.","stimulated, unstimulated","","True","","","","string","","","","" +"synovialCollectionProcedure","Classification of procedure for synovial tissue collection.","arthroplasty, biopsy, synovectomy, unknown","","True","","","","string","","","","" +"publicationType","General classification of publication.","correction, peer-reviewed, pre-print","","True","","","","string","","","","" +"title","Title of the publication.","","","True","","","","string","","","","" +"journal","Journal in which the publication was released","","","True","","","","string","","","","" +"publicationDate","The publication date extracted from PubMed database","","","True","","","","string","","","","" +"DOI","Digital object identifier","","","True","","","","string","uri-reference","","","" +"anatomicalSite","The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.","left 2nd MCP joint, left ankle joint, left hip joint, left knee joint, left wrist joint, other site, right 1st MTP joint, right 2nd MCP joint, right 2nd MTP joint, right 3rd MCP joint, right ankle joint, right hip joint, right knee joint, right wrist joint, unknown","","True","","","","string","","","","" +"vitiligoPattern","A high-level classification of vitiligo based on the distribution and patterning of lesions across the body.","mixed, non-segmental, segmental, unclassified","","False","","","","string","","","","" +"vitiligoPhenotype","Classification of vitiligo lesions which correlate with autoimmune activity and result in specific skin and depigmentation manifestations at the lesion site. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list.","active, confetti, inflammatory, none, trichrome","","True","","","list like error","string_list","","","","" +"psoriasisType","General type classification of psoriasis disease manifestation.","erythrodermic, guttate, inverse, plaque, pustular","","False","","","","string","","","","" +"ageDiagnosis","Age at which subject was diagnosed with `diagnosis`. If providing this value be sure the unit matches that used for `age`.","","","False","","","num error","number","","","","" +"age","Age at which subject was enrolled in study or age at corresponding visit and data collection event. If value unknown, enter '-1'.","","","True","","","num error","number","","","","" +"ageUnits","The unit of measure used for `ageEnrollment` and `ageDiagnosis`","months, years","","True","","","","string","","","","" +"sex","A textual description of a person's sex at birth.","female, intersex, male, unknown","","True","","","","string","","","","" +"height","Standing height of subject.","","","True","","","num error","number","","","","" +"race","A textual description of a person's race.","American Indian or Alaska Native, Asian, Black or African American, Hispanic, Mixed Race, Native Hawaiian or Other Pacific Islander, White, other, unknown","","False","","","","string","","","","" +"ethnicity","The ethnicity of a person.","Hispanic or Latino, Not Hispanic or Latino, unknown","","False","","","","string","","","","" +"heightUnits","Unit of measure of value provided for `height`.","centimeters, feet, inches, meters","","True","","","","string","","","","" +"weight","Weight of subject. If value unknown, enter '-1'.","","","True","","","num error","number","","","","" +"weightUnits","Abbreviated unit of measure of value provided for `weight`.","g, kg, lb, oz","","True","","","","string","","","","" +"comorbidities","Any diseases or medical condition that is simultaneously present in addition to `diagnosis`.","Hashimoto's Thyroiditis, autoimmune thyroid disease, cardiovascular disease, diabetes, inflammatory bowel disease, multiple sclerosis, other, psoriasis, psoriatic arthritis, pulmonary disease, rheumatoid arthritis, systemic lupus erythematosus","","False","","","list like error","string_list","","","","" +"diabetesType","Type of diabetes mellitus.","gestational, type 1, type 2, unknown","","False","","","","string","","","","" +"VIDA","Vitiligo Disease Activity Score, a six-point scale that evaluates the activity of vitiligo","","","False","","","","string","","","","" +"VASI","Total body Vitiligo Area Severity Index, a measure of the percentage of vitiligo involvement across the body calculated in terms of hand units.","","","False","","","","string","","","","" +"VETI","Vitiligo Extent Tensity Index measures the extent of vitiligo by a numerical score and combines analysis of extensity and severity of vitiligo to produce a constant and reproducible number.","","","False","","https://doi.org/10.5826/dpc.0404a18","num error","number","","","","" +"PASI","Psoriasis Area and Severity Index, is a measurement of the discoloration, thickness, scaling, and coverage of psoriasis plaques.","","","False","","","","string","","","","" +"CDASI","Cutaneous Dermatomyositis Disease Area and Severity Index, is a measurement used to characterize cutaneous dermatomyositis severity.","","","False","","","","string","","","","" +"inputCellCount","An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.","","","True","","","int error","integer","","","","" +"totalReads","Total number of reads sequenced from the library.","","","True","","","int error","integer","","","","" +"FACSPopulation","A description of the marker gating strategy used to derive the population cells with FACS.","","","True","","","","string","","","","" +"cellType","The cell type name from Cell Ontology for the corresponding CL identifier.","","","False","","","","string","","","","" +"userDefinedCellType","User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.","","","False","","","","string","","","","" +"krennInflammatory","A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennLining","A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennStroma","A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennSynovitisScore","The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"datasetStatus","A categorical label indicating the status of an ARK Portal dataset. This is applied to improve downstream management of datasets as well as various ETL workflows.","deprecated, released, test, under peer review, unreleased","","True","","","","string","","","","" +"sampleProcessingBatch","A label indicating batching of sample processing or preparation that occurs prior to data collection.","","","False","","","","string","","","","" +"dataCollectionBatch","A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.","","","False","","","","string","","","","" +"metadataStandards","Metadata standards used to generate the metadata","ARK data model, user-defined","","False","","","","string","","","","" +"skinSunExposure","For skin-based biospecimen, this attribute indicates whether the sample was collected from an anatomical site that does or does not receive routine sun exposure.","not sun exposed, sun exposed","","False","","","","string","","","","" +"sequencingSpikeIn","Pre-made sequencing libraries may be added to your sequencing run to improve sequencing results. If such a 'spike-in' library was used, this attribute specifies the name of the library and the percentage of the spike-in library used in the sequencing run.","","","False","","","","string","","","","" +"librarySpikeIn","Pre-made collections of nucleic acid fragments can be added to samples during the library construction process to improve downstream quantification and statistical analyses. If such a 'spike-in' was used, this attribute specifies the name and manufacturer of the spike-in.","","","False","","","","string","","","","" +"readLength","The number of base pairs (bp) sequenced for reads in a fastq file.","","","False","","","int error","integer","","","","" +"processedDataType","A label used for file annotations to provide a brief description of the processed data file.","barcode counts, differential expression results, epigenomic peaks, gene counts","","False","","","list like error","string_list","","","","" +"eventCount","The total number of events detected and captured in the corresponding FCS file. In cytometry, an event corresponds to particles detected and measured by the instrument. This number corresponds to the number of rows in the FCS file and is expected to be a whole integer number.","","","False","","","int error","integer","","","","" +"notes","Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.","","","False","","","","string","","","","" +"species","The genus species of sample or subject origin.","Homo sapiens","","True","","","","string","","","","" +"treatment","A short descriptive label indicating what experimental treatments have been applied to a biospecimen before data capture. Please include the word 'control' to indicate specific treatments that are intended to serve as a control group.","","","True","","","","string","","","","" +"treatmentTimepoint","Where applicable, specify the timepoint relative to treatment to distinguish different sets of samples that have undergone the same treatment but for different lengths of time. REQUIRED: please specify the unit of time, e.g., secs, mins, hrs, days, etc.","","","False","","","","string","","","","" +"slideID","A distinct label or name, unique within an experiment, assigned to an imaging slides.","","","True","","","","string","","","","" +"altSampleID","An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.","","","False","","","","string","","","","" +"sampleCollectionBatch","A label indicating batching of sample collection or experiment execution that occurs prior to data collection.","","","False","","","","string","","","","" +"associatedAccession","This is a File and Dataset annotation attribute indicating additional accessions (i.e., unique identifiers) associated with the data when the data has also been submitted to or can be found in other repositories such as GEO, SRA, dbGaP, etc.","","","False","","","list like error","string_list","","","","" +"percentCellViability","A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.","","","True","","","inRange 50 100 error","integer","","50","100","" +"sequencingSaturation","A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.","","","False","","https://kb.10xgenomics.com/hc/en-us/articles/115005062366-What-is-sequencing-saturation","inRange 0 1 error","number","","0","1","" +"publicationSynID","The synID of the corresponding Synapse entity that stores metadata about the publication. This is used to differentiate publication-specific files, often consisting of level 4 processed data and expanded subject metadata, in a publication dataset that also includes raw or minimally processed files from experimental datasets. This provides an easy way to distinguish and select for the publication-specific data from which the research findings were derived. When this attribute is used to annotate a Dataset it serves as a way to directly link the Dataset entity with the publication metadata stored in Synapse.","","","False","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"associatedDataset","The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.","","","False","","","list like::regex search ^syn[0-9]{8} error","string_list","","","","^syn[0-9]{8}" +"custom10xProbeSetSynID","If custom modified probe sets were used for collecting 10x Chromium Flex scRNA-seq data, then the probe reference files should be grouped as a zip or tar archive and uploaded as metadata. This attribute links the experimental data back to the probe set archive file via a synapse ID.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"targetPanelSynID","In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"datasetDescription","A Dataset-specific attribute specifying the synID of the folder that contains a wiki write-up of the dataset description. This wiki content will be surfaced on the ARK Portal frontend site.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"DOID","Disease ontology identifier associated with `diagnosis`. Attribute values are applied by ARK Portal data managers.","","","True","","","regex search ^DOID error","string","","","","^DOID" +"BRENDA","BRENDA Tissue and Enzyme Source Ontology ID (BTO) corresponding to the `biospecimenType`. Attribute values are applied by ARK Portal data managers.","","","True","","https://bioportal.bioontology.org/ontologies/BTO","regex search ^BTO error","string","","","","^BTO" +"year","Year (YYYY) in which the paper was published.","","","True","","","regex search [1-2][0-9]{3} error","string","","","","[1-2][0-9]{3}" +"PMID","PubMed(R) Identifier","","","True","","","regex search ^PMID error","string","","","","^PMID" +"ImmPortAccession","Accession to corresponding information in ImmPort.","","","False","","","regex search ^SDY error","string","","","","^SDY" +"cellOntologyID","Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.","","","False","","","regex search ^CL: error","string","","","","^CL:" +"PMCID","Pubmed Central Identifier, formatted as a compact URI string that will automatically resolve into a URL based on the Synapse bioregistry; e.g., pmc:PMCxxxxxxxx","","","False","","","regex search ^pmc:PMC[0-9]{8} error","string","","","","^pmc:PMC[0-9]{8}" diff --git a/model_contexts/singlecell/ark.singlecell_context.csv b/model_contexts/singlecell/ark.singlecell_context.csv index 69df702a..b3e3b7d5 100644 --- a/model_contexts/singlecell/ark.singlecell_context.csv +++ b/model_contexts/singlecell/ark.singlecell_context.csv @@ -1,19 +1,19 @@ -"Attribute","Description","Valid Values","DependsOn","Properties","Required","Parent","DependsOn Component","Source","Validation Rules","columnType" -"single specimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","biospecimenID, individualID","","","","","","","" -"resourceType","","experimental data, metadata","","","TRUE","","","","list like error","string" -"fileFormat","Standard file format name or file extension","csv, tsv, txt, xlsx, xls, fam, bim, bed, bam, h5, mtx, bai, rds, tgz, zip, h5ad","","","TRUE","","","","","string" -"feature barcode sequencing","Is a valid value of `assay` that triggers conditional dependencies for additional attributes.","","targetPanel, targetPanelSynID, targetPanelSize","","","","","","","" -"Rds","Is a valid value of 'fileFormat' that triggers conditional dependencies for additional attributes.","","RObjectClass","","","","","","","" -"experimental data","Is a valid value of 'resourceType' that triggers conditional dependencies for additional attributes.","","processedDataType","","","","","","","" -"metadata","Any file that contains curated data describing an experiment and experimental-derived data, including metadata about study subjects, biospecimens, protocols, assay reagents, marker panels, and ID mappings. Is a valid value of Resource Type that triggers conditional dependencies for additional attributes.","","metadataType","","FALSE","","","","","" -"metadataType","","single-cell metadata","","","TRUE","","","","","string" -"scRNASeq Processed Data Annotation Template","A data contributor template outlining metadata to be collected as file annotations for scRNA-seq processed data files (i.e., anything not a fastq file).","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput","","","","","","","" -"BDM scRNASeq Processed Data Annotations","A template outlining metadata to be collected as file annotations for scRNA-seq processed data files (i.e., anything not a fastq file). to be compiled by ARK BDM.","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, visitID, libraryPrepMethod, nucleicAcidSource, alignmentReference, softwareAndVersion, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","" -"snRNASeq Processed Data Annotation Template","A data contributor template outlining metadata to be collected as file annotations for snRNA-seq processed data files (i.e., anything not a fastq file).","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput","","","","","","","" -"BDM snRNASeq Processed Data Annotations","A template outlining metadata to be collected as file annotations for snRNA-seq processed data files (i.e., anything not a fastq file). to be compiled by ARK BDM.","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, visitID, libraryPrepMethod, nucleicAcidSource, alignmentReference, softwareAndVersion, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","" -"snATACSeq Processed Data Annotation Template","A data contributor template outlining metadata to be collected as file annotations for snATAC-seq processed data files (i.e., anything not a fastq file).","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput","","","","","","","" -"BDM snATACSeq Processed Data Annotations","A template outlining metadata to be collected as file annotations for snATAC-seq processed data files (i.e., anything not a fastq file). to be compiled by ARK BDM.","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, visitID, libraryPrepMethod, nucleicAcidSource, alignmentReference, softwareAndVersion, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","" -"scVDJSeq Processed Data Annotation Template","A data contributor template outlining metadata to be collected as file annotations for scVDJ-seq (i.e., immune repertoire profiling) processed data files (i.e., anything not a fastq file).","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality","","","","","","","" -"BDM scVDJSeq Processed Data Annotations","A template outlining metadata to be collected as file annotations for scVDJ-seq (i.e., immune repertoire profiling) processed data files (i.e., anything not a fastq file). to be compiled by ARK BDM.","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, visitID, libraryPrepMethod, nucleicAcidSource, alignmentReference, softwareAndVersion, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","" -"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" -"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" +"Attribute","Description","Valid Values","DependsOn","Required","Parent","Source","Validation Rules","columnType","Format","Minimum","Maximum","Pattern" +"single specimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","biospecimenID, individualID","","","","","","","","","" +"resourceType","","experimental data, metadata","","TRUE","","","list like error","string_list","","","","" +"fileFormat","Standard file format name or file extension","csv, tsv, txt, xlsx, xls, fam, bim, bed, bam, h5, mtx, bai, rds, tgz, zip, h5ad","","TRUE","","","","string","","","","" +"feature barcode sequencing","Is a valid value of `assay` that triggers conditional dependencies for additional attributes.","","targetPanel, targetPanelSynID, targetPanelSize","","","","","","","","","" +"rds","Is a valid value of 'fileFormat' that triggers conditional dependencies for additional attributes.","","RObjectClass","","","","","","","","","" +"experimental data","Is a valid value of 'resourceType' that triggers conditional dependencies for additional attributes.","","processedDataType","","","","","","","","","" +"metadata","Any file that contains curated data describing an experiment and experimental-derived data, including metadata about study subjects, biospecimens, protocols, assay reagents, marker panels, and ID mappings. Is a valid value of Resource Type that triggers conditional dependencies for additional attributes.","","metadataType","FALSE","","","","","","","","" +"metadataType","","single-cell metadata","","TRUE","","","","string","","","","" +"scRNASeq Processed Data Annotation Template","A data contributor template outlining metadata to be collected as file annotations for scRNA-seq processed data files (i.e., anything not a fastq file).","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput","","","","","","","","","" +"BDM scRNASeq Processed Data Annotations","A template outlining metadata to be collected as file annotations for scRNA-seq processed data files (i.e., anything not a fastq file). to be compiled by ARK BDM.","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, visitID, libraryPrepMethod, nucleicAcidSource, alignmentReference, softwareAndVersion, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","","","" +"snRNASeq Processed Data Annotation Template","A data contributor template outlining metadata to be collected as file annotations for snRNA-seq processed data files (i.e., anything not a fastq file).","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput","","","","","","","","","" +"BDM snRNASeq Processed Data Annotations","A template outlining metadata to be collected as file annotations for snRNA-seq processed data files (i.e., anything not a fastq file). to be compiled by ARK BDM.","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, visitID, libraryPrepMethod, nucleicAcidSource, alignmentReference, softwareAndVersion, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","","","" +"snATACSeq Processed Data Annotation Template","A data contributor template outlining metadata to be collected as file annotations for snATAC-seq processed data files (i.e., anything not a fastq file).","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput","","","","","","","","","" +"BDM snATACSeq Processed Data Annotations","A template outlining metadata to be collected as file annotations for snATAC-seq processed data files (i.e., anything not a fastq file). to be compiled by ARK BDM.","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, visitID, libraryPrepMethod, nucleicAcidSource, alignmentReference, softwareAndVersion, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","","","" +"scVDJSeq Processed Data Annotation Template","A data contributor template outlining metadata to be collected as file annotations for scVDJ-seq (i.e., immune repertoire profiling) processed data files (i.e., anything not a fastq file).","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality","","","","","","","","","" +"BDM scVDJSeq Processed Data Annotations","A template outlining metadata to be collected as file annotations for scVDJ-seq (i.e., immune repertoire profiling) processed data files (i.e., anything not a fastq file). to be compiled by ARK BDM.","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, visitID, libraryPrepMethod, nucleicAcidSource, alignmentReference, softwareAndVersion, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","","","" +"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" +"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" diff --git a/model_contexts/singlecell/ark.singlecell_model.csv b/model_contexts/singlecell/ark.singlecell_model.csv index 8ee3ad46..0c3509cc 100644 --- a/model_contexts/singlecell/ark.singlecell_model.csv +++ b/model_contexts/singlecell/ark.singlecell_model.csv @@ -1,124 +1,124 @@ -"Attribute","Description","Valid Values","DependsOn","Properties","Required","Parent","DependsOn Component","Source","Validation Rules","columnType" -"single specimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","biospecimenID, individualID","","","","","","","" -"resourceType","High-level classification of the file content","experimental data, metadata","","","TRUE","","","","list like error","string" -"fileFormat","Standard file format name or file extension","csv, tsv, txt, xlsx, xls, fam, bim, bed, bam, h5, mtx, bai, rds, tgz, zip, h5ad","","","TRUE","","","","","string" -"feature barcode sequencing","Is a valid value of `assay` that triggers conditional dependencies for additional attributes.","","targetPanel, targetPanelSynID, targetPanelSize","","","","","","","" -"Rds","Is a valid value of 'fileFormat' that triggers conditional dependencies for additional attributes.","","RObjectClass","","","","","","","" -"experimental data","Is a valid value of 'resourceType' that triggers conditional dependencies for additional attributes.","","processedDataType","","","","","","","" -"metadata","Any file that contains curated data describing an experiment and experimental-derived data, including metadata about study subjects, biospecimens, protocols, assay reagents, marker panels, and ID mappings. Is a valid value of Resource Type that triggers conditional dependencies for additional attributes.","","metadataType","","FALSE","","","","","" -"metadataType","A label further classifying the content of metadata resource.","single-cell metadata","","","TRUE","","","","","string" -"scRNASeq Processed Data Annotation Template","A data contributor template outlining metadata to be collected as file annotations for scRNA-seq processed data files (i.e., anything not a fastq file).","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput","","","","","","","" -"BDM scRNASeq Processed Data Annotations","A template outlining metadata to be collected as file annotations for scRNA-seq processed data files (i.e., anything not a fastq file). to be compiled by ARK BDM.","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, visitID, libraryPrepMethod, nucleicAcidSource, alignmentReference, softwareAndVersion, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","" -"snRNASeq Processed Data Annotation Template","A data contributor template outlining metadata to be collected as file annotations for snRNA-seq processed data files (i.e., anything not a fastq file).","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput","","","","","","","" -"BDM snRNASeq Processed Data Annotations","A template outlining metadata to be collected as file annotations for snRNA-seq processed data files (i.e., anything not a fastq file). to be compiled by ARK BDM.","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, visitID, libraryPrepMethod, nucleicAcidSource, alignmentReference, softwareAndVersion, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","" -"snATACSeq Processed Data Annotation Template","A data contributor template outlining metadata to be collected as file annotations for snATAC-seq processed data files (i.e., anything not a fastq file).","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput","","","","","","","" -"BDM snATACSeq Processed Data Annotations","A template outlining metadata to be collected as file annotations for snATAC-seq processed data files (i.e., anything not a fastq file). to be compiled by ARK BDM.","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, visitID, libraryPrepMethod, nucleicAcidSource, alignmentReference, softwareAndVersion, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","" -"scVDJSeq Processed Data Annotation Template","A data contributor template outlining metadata to be collected as file annotations for scVDJ-seq (i.e., immune repertoire profiling) processed data files (i.e., anything not a fastq file).","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality","","","","","","","" -"BDM scVDJSeq Processed Data Annotations","A template outlining metadata to be collected as file annotations for scVDJ-seq (i.e., immune repertoire profiling) processed data files (i.e., anything not a fastq file). to be compiled by ARK BDM.","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, visitID, libraryPrepMethod, nucleicAcidSource, alignmentReference, softwareAndVersion, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","" -"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" -"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","" -"Component","A high-level attribute for grouping attributes into templates.","","","","True","","","","","string" -"publicationSynID","The synID of the corresponding Synapse entity that stores metadata about the publication. This is used to differentiate publication-specific files, often consisting of level 4 processed data and expanded subject metadata, in a publication dataset that also includes raw or minimally processed files from experimental datasets. This provides an easy way to distinguish and select for the publication-specific data from which the research findings were derived. When this attribute is used to annotate a Dataset it serves as a way to directly link the Dataset entity with the publication metadata stored in Synapse.","","","","False","","","","regex search ^syn[0-9]{8} error","string" -"associatedDataset","The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.","","","","False","","","","list like::regex search ^syn[0-9]{8} error","string" -"program","Name of the funding program that supported the generation of data and associated files","AMP AIM, AMP RA/SLE, Community Contribution","","","True","","","","","string" -"project","A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.","AIM for RA, ELLIPSS, LOCKIT, RA, SLE, STAMP, UMass V-CoRT","","","True","","","","list like error","string" -"programPhase","A label noting which AMP RA/SLE program phase generated the data.","I, II","","","True","","","","list like error","string" -"dataType","High-level classification of the type of data contained in the file, loosely related to the experimental method or biological entity that is being profiled. Select all that apply using a comma-delimited list, though in most cases only a single label is expected. For multimodal datasets with concomitant profiling of biospecimen include 'multimodal'.","cytometry, epigenomics, genomics, histology, immune repertoire profiling, immunostaining, lipidomics, metabolomics, microbiome, multimodal, proteomics, transcriptomics","","","True","","","","list like error","string" -"dataSubtype","General classification to differentiate between omics profiling modalities. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list, however this is largely only expected in the context of Datasets and files that contain integrated experimental data spanning multiple types.","bulk, none, pseudobulk, single-cell, single-nucleus, spatial","","","True","","","","list like error","string" -"dataLevel","Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.","1, 2, 3, 4, 5","","","True","","","","","string" -"assay","The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.","ASAPSeq, CE-MS, CITESeq, CosMX, CyTOF, GenePS SeqFISH, H&E, LC-MS/MS, NULISA, Olink Explore HT, Olink Flex, Olink Focus, Olink Reveal, Olink Target 48, Olink Target 96, RNASeq, SNP array, SomaScan, VDJSeq, Visium, WES, WGS, Xenium, feature barcode sequencing, flow cytometry, imaging mass cytometry, imaging mass spectrometry, kiloplex, multiplexed ELISA, scRNASeq, scVDJSeq, serial IHC, snATACSeq, snRNASeq","","","True","","","","list like error","string" -"10xProbeSetReference","Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.","Flex Human Transcriptome Probe Set v1.0.1, Flex Human Transcriptome Probe Set v1.1.0, Visium Human Transcriptome v1, Visium Human Transcriptome v2, custom probe set","","","True","","","","","string" -"custom10xProbeSetSynID","If custom modified probe sets were used for collecting 10x Chromium Flex scRNA-seq data, then the probe reference files should be grouped as a zip or tar archive and uploaded as metadata. This attribute links the experimental data back to the probe set archive file via a synapse ID.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"platform","The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.","BD FACSAria Fusion cell sorter, BD FACSAria III, BD FACSCanto, BD FACSCanto II, BD FACSDiscover A8, BD FACSDiscover S8, BD FACSLyric Clinical, BD FACSMelody, BD FACSymphony S6, BD LSRFortessa, Chromium Controller, Chromium GEM-X Single Cell 3' Chip v4, Chromium Next GEM Chip G, Chromium Next GEM Chip H, Chromium Next GEM Chip K, Chromium Next GEM Chip M, Chromium Next GEM Chip Q, Chromium X, Chromium Xo, Chromium iX, CyTOF XT, Cytek Aurora, Cytek Aurora Evo, Fluidigm BioMark, GEM-X Flex Gene Expression Chip, GEM-X OCM 5' Chip, Helios Mass Cytometer, Hyperion, Illumina HiSeq 2500, Illumina HiSeq X Ten, Illumina NextSeq 500, Illumina NovaSeq 6000, Illumina NovaSeq X, Not Applicable, Olink Signature Q100, Sony MA900, Thermo Fisher Attune CytPix, Thermo Fisher Attune NxT, Thermo Fisher Attune Xenith, Visium CytAssist, Xenium, none, unknown","","","True","","","","list like error","string" -"specimenModality","Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens","multispecimen, single specimen, unknown","","","True","","","","","string" -"individualID","Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.","","","","True","","","","#ClinicalMetadataTemplate unique error^^#BiospecimenMetadataTemplate str^^list like error","string" -"biospecimenID","A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.","","","","True","","","","#BiospecimenMetadataTemplate unique error^^list like error","string" -"parentBiospecimenID","The biospecimenID associated with the originating biospecimen for derived or child biospecimens.","","","","False","","","","","string" -"biospecimenType","A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.","PBMCs, cell line, fibroblast-like synoviocyte, kidney biopsy, none, plasma, primary cell culture, saliva, salivary gland, serum, skin biopsy, skin swab, stool, suction blister cells, suction blister fluid, synovial fluid, synovial tissue, total leukocytes, urine, uvea, whole blood","","","True","","","","#BiospecimenMetadataTemplate str^^#InVitroBiospecimenMetadataTemplate^^list like error","string" -"primaryCellSource","A label indicating the biological source material from which a primary cell culture was derived.","PBMCs, kidney, pannus-derived dermis, pannus-derived epidermis, salivary gland, synovial tissue, total leukocytes, urine, uvea, whole blood","","","True","","","","","string" -"codingLanguage","The coding, aka programming, language(s) contained in the file marked with `resourceType = code`. Select all that apply.","C, C#, C++, Fortan, Java, Julia, Matlab, Python, R, Ruby, SAS","","","True","","","","","string" -"visitID","Ordinal ID distinguishing different patient visits.","","","","True","","","","","string" -"softwareAndVersion","Relevant software and version used to generate the data file.","BD FACSDiva 8.0.1, Cell Ranger 9.0.1, Cell Ranger ATAC v1.1.0, Cell Ranger v3.0.0, Cell Ranger v3.0.1, Cell Ranger v3.0.2, Cell Ranger v3.1.0, Cell Ranger v4.0.0, Cell Ranger v5.0.0, Cell Ranger v5.0.1, Cell Ranger v6.0.0, Cell Ranger v6.0.1, Cell Ranger v6.0.2, Cell Ranger v6.1.0, Cell Ranger v6.1.1, Cell Ranger v6.1.2, Cell Ranger v7.0.0, Cell Ranger v7.0.1, Cell Ranger v7.1.0, Cell Ranger v7.2.0, Cell Ranger v8.0.0, Cell Ranger v8.0.1, Cell Ranger v9.0.0, Space Ranger 3.0.0, Space Ranger 3.0.1, Space Ranger 3.1.0, Space Ranger 3.1.1, Space Ranger 3.1.2, Space Ranger 3.1.3, demuxlet","","","False","","","","","string" -"targetPanel","A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.","","","","True","","","","","string" -"targetPanelSynID","In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"targetPanelSize","The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).","","","","False","","","","int error","integer" -"nucleicAcidSource","The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.","BCR mRNA, CRISPR protospacer feature barcode, TCR mRNA, Tn5-accessible gDNA, antigen capture barcode, gDNA, globin-depleted RNA, intracellular protein feature barcode, multiplexing oligo, poly(A) RNA, rRNA-depleted RNA, surface protein feature barcode","","","True","","","","list like error","string" -"biospecimenSubtype","Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.","FFPE tissue, PFA-fixed tissue, cell or tissue lysate, cell suspension, flow-sorted cells, fresh tissue, frozen tissue, nuclei suspension, supernatant","","","False","","","","","string" -"cellRangerOutput","10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz","Not Applicable, filtered MEX, filtered_feature_bc_matrix, filtered_peak_bc_matrix, raw MEX, raw_feature_bc_matrix, raw_peak_bc_matrix","","","True","","","","","string" -"alignmentReference","The genomic/transcriptomic reference used for performing read alignment against.","10x Cell Ranger Human GRCh38 2020-A, 10x Cell Ranger Human GRCh38 2024-A, GRCh38, modified GRCh38, unknown, vdj_GRCh38_alts_ensembl-4.0.0","","","True","","","","","string" -"libraryPrepMethod","Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.","10x Chromium Fixed RNA Human Transcriptome, 10x Chromium GEM-X Single Cell 3' v4, 10x Chromium GEM-X Single Cell 5' v3, 10x Chromium Next GEM Single Cell 3', 10x Chromium Next GEM Single Cell 3' 3.1, 10x Chromium Next GEM Single Cell 5' v1.1, 10x Chromium Next GEM Single Cell 5' v2, 10x Chromium Next GEM Single Cell ATAC v2, 10x Chromium Single Cell Human BCR, 10x Chromium Single Cell Human TCR, 10x GEM-X Flex Gene Expression Human, 10x GEM-X Universal 5' Gene Expression v3, CEL-Seq2, Chromium Next GEM Single Cell ATAC v1.1, Fluidigm C1 HT, NEBNext Human Immune Sequencing Kit, NEBNext Ultra II Directional RNA Library, Nextera XT, Nextera XT DNA, QIAseq miRNA Library, SMART-Seq Human BCR with UMI, SMART-Seq Human TCR with UMI, SMART-Seq v4 Ultra Low Input RNA, SMARTer Stranded Total RNA v2, Takara Human BCR profiling for Illumina, Takara Human TCR profiling for Illumina, Takara Human TCRv2 profiling for Illumina, Takara Human scTCR profiling for Illumina, TruSeq Stranded mRNA, custom DASH-treatment, in-house library prep","","","True","","","","","string" -"datasetType","High-level classification of dataset entity distinguishing between datasets compiled for a specific publication or as a general data resource.","experimental, publication","","","True","","","","","string" -"acknowledgmentStatement","A Dataset-specific attribute specifying the path to the wiki subpage within the ARK Portal - backend project that contains the acknowledgement statement that must be included in publications using data from the given dataset as a stipulation of the conditions of use.","syn26710600/wiki/619685","","","True","","","","","string" -"datasetDescription","A Dataset-specific attribute specifying the synID of the folder that contains a wiki write-up of the dataset description. This wiki content will be surfaced on the ARK Portal frontend site.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"ARKRelease","A Dataset-specific attribute specifying the ARK Portal release in which this dataset was first made available to the public.","1.0, 2.0, 2024.06.R1, 2024.07.R1, 2024.08.R1, 2024.09.R1, 2024.10.R1, 2024.12.R1, 2025.01.R1, 2025.02.R1, 2025.03.R1, 2025.04.R1, 2025.05.R1, 2025.06.R1, 2025.07.R1, 2025.08.R1, 2025.09.R1, 2025.10.R1, 2025.11.R1, 2025.12.R1","","","True","","","","","string" -"RObjectClass","Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.","ROCR prediction.object, Seurat object, SummarizedExperiment, Symphony reference, data.frame, list, matrix, sparse matrix, vector","","","False","","","","","string" -"diagnosis","A high-level classifier indicating the disease status of an individual.","At-Risk RA, Not Applicable, OA, RA, SLE, Sjogren's disease, control, cutaneous lupus erythematosus, dermatomyositis, discoid lupus erythematosus, lupus nephritis, psoriasis, psoriatic arthritis, scleroderma, unknown, vitiligo","","","True","","","","list like error","string" -"DOID","Disease ontology identifier associated with `diagnosis`. Attribute values are applied by ARK Portal data managers.","","","","True","","","","regex search ^DOID error","string" -"libraryID","A library label or name, unique within an experiment, used to distinguish sequencing libraries.","","","","True","","","","","string" -"associatedCodeURL","A URL to the repository where associated code is available.","","","","False","","","","","string" -"FMAID","Functional Model of Anatomy ontology ID corresponding to the anatomical site and origin of the biospecimen. This attribute is assigned by ARK Portal data managers.","","","","True","","","https://bioportal.bioontology.org/ontologies/FMA/?p=summary","int error","integer" -"BRENDA","BRENDA Tissue and Enzyme Source Ontology ID (BTO) corresponding to the `biospecimenType`. Attribute values are applied by ARK Portal data managers.","","","","True","","","https://bioportal.bioontology.org/ontologies/BTO","regex search ^BTO error","string" -"skinSiteStatus","Disease manifestation status of skin biospecimen.","healthy control, lesional, lesional proximal, non-lesional","","","True","","","","","string" -"salivaCollectionProcedure","Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.","stimulated, unstimulated","","","True","","","","","string" -"synovialCollectionProcedure","Classification of procedure for synovial tissue collection.","arthroplasty, biopsy, synovectomy, unknown","","","True","","","","","string" -"publicationType","General classification of publication.","correction, peer-reviewed, pre-print","","","True","","","","","string" -"title","Title of the publication.","","","","True","","","","","string" -"journal","Journal in which the publication was released","","","","True","","","","","string" -"year","Year (YYYY) in which the paper was published.","","","","True","","","","regex search [1-2][0-9]{3} error","string" -"publicationDate","The publication date extracted from PubMed database","","","","True","","","","","string" -"PMID","PubMed(R) Identifier","","","","True","","","","regex search ^PMID error","string" -"DOI","Digital object identifier","","","","True","","","","","string" -"anatomicalSite","The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.","left 2nd MCP joint, left ankle joint, left hip joint, left knee joint, left wrist joint, other site, right 1st MTP joint, right 2nd MCP joint, right 2nd MTP joint, right 3rd MCP joint, right ankle joint, right hip joint, right knee joint, right wrist joint, unknown","","","True","","","","","string" -"vitiligoPattern","A high-level classification of vitiligo based on the distribution and patterning of lesions across the body.","mixed, non-segmental, segmental, unclassified","","","False","","","","","string" -"vitiligoPhenotype","Classification of vitiligo lesions which correlate with autoimmune activity and result in specific skin and depigmentation manifestations at the lesion site. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list.","active, confetti, inflammatory, none, trichrome","","","True","","","","list like error","string" -"psoriasisType","General type classification of psoriasis disease manifestation.","erythrodermic, guttate, inverse, plaque, pustular","","","False","","","","","string" -"ageDiagnosis","Age at which subject was diagnosed with `diagnosis`. If providing this value be sure the unit matches that used for `age`.","","","","False","","","","num error","number" -"age","Age at which subject was enrolled in study or age at corresponding visit and data collection event. If value unknown, enter '-1'.","","","","True","","","","num error","number" -"ageUnits","The unit of measure used for `ageEnrollment` and `ageDiagnosis`","months, years","","","True","","","","","string" -"sex","A textual description of a person's sex at birth.","female, intersex, male, unknown","","","True","","","","","string" -"height","Standing height of subject.","","","","True","","","","num error","number" -"race","A textual description of a person's race.","American Indian or Alaska Native, Asian, Black or African American, Hispanic, Mixed Race, Native Hawaiian or Other Pacific Islander, White, other, unknown","","","False","","","","","string" -"ethnicity","The ethnicity of a person.","Hispanic or Latino, Not Hispanic or Latino, unknown","","","False","","","","","string" -"heightUnits","Unit of measure of value provided for `height`.","centimeters, feet, inches, meters","","","True","","","","","string" -"weight","Weight of subject. If value unknown, enter '-1'.","","","","True","","","","num error","number" -"weightUnits","Abbreviated unit of measure of value provided for `weight`.","g, kg, lb, oz","","","True","","","","","string" -"comorbidities","Any diseases or medical condition that is simultaneously present in addition to `diagnosis`.","Hashimoto's Thyroiditis, autoimmune thyroid disease, cardiovascular disease, diabetes, inflammatory bowel disease, multiple sclerosis, other, psoriasis, psoriatic arthritis, pulmonary disease, rheumatoid arthritis, systemic lupus erythematosus","","","False","","","","list like error","string" -"diabetesType","Type of diabetes mellitus.","gestational, type 1, type 2, unknown","","","False","","","","","string" -"VIDA","Vitiligo Disease Activity Score, a six-point scale that evaluates the activity of vitiligo","","","","False","","","","","string" -"VASI","Total body Vitiligo Area Severity Index, a measure of the percentage of vitiligo involvement across the body calculated in terms of hand units.","","","","False","","","","","string" -"VETI","Vitiligo Extent Tensity Index measures the extent of vitiligo by a numerical score and combines analysis of extensity and severity of vitiligo to produce a constant and reproducible number.","","","","False","","","https://doi.org/10.5826/dpc.0404a18","num error","number" -"PASI","Psoriasis Area and Severity Index, is a measurement of the discoloration, thickness, scaling, and coverage of psoriasis plaques.","","","","False","","","","","string" -"CDASI","Cutaneous Dermatomyositis Disease Area and Severity Index, is a measurement used to characterize cutaneous dermatomyositis severity.","","","","False","","","","","string" -"percentCellViability","A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.","","","","True","","","","inRange 50 100 error","integer" -"inputCellCount","An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.","","","","True","","","","int error","integer" -"totalReads","Total number of reads sequenced from the library.","","","","True","","","","int error","integer" -"sequencingSaturation","A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.","","","","False","","","https://kb.10xgenomics.com/hc/en-us/articles/115005062366-What-is-sequencing-saturation","inRange 0 1 error","number" -"ImmPortAccession","Accession to corresponding information in ImmPort.","","","","False","","","","regex search ^SDY error","string" -"FACSPopulation","A description of the marker gating strategy used to derive the population cells with FACS.","","","","True","","","","","string" -"cellOntologyID","Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.","","","","False","","","","regex search ^CL: error","string" -"cellType","The cell type name from Cell Ontology for the corresponding CL identifier.","","","","False","","","","","string" -"userDefinedCellType","User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.","","","","False","","","","","string" -"krennInflammatory","A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennLining","A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennStroma","A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennSynovitisScore","The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"datasetStatus","A categorical label indicating the status of an ARK Portal dataset. This is applied to improve downstream management of datasets as well as various ETL workflows.","deprecated, released, test, under peer review, unreleased","","","True","","","","","string" -"sampleProcessingBatch","A label indicating batching of sample processing or preparation that occurs prior to data collection.","","","","False","","","","","string" -"dataCollectionBatch","A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.","","","","False","","","","","string" -"metadataStandards","Metadata standards used to generate the metadata","ARK data model, user-defined","","","False","","","","","string" -"skinSunExposure","For skin-based biospecimen, this attribute indicates whether the sample was collected from an anatomical site that does or does not receive routine sun exposure.","not sun exposed, sun exposed","","","False","","","","","string" -"sequencingSpikeIn","Pre-made sequencing libraries may be added to your sequencing run to improve sequencing results. If such a 'spike-in' library was used, this attribute specifies the name of the library and the percentage of the spike-in library used in the sequencing run.","","","","False","","","","","string" -"librarySpikeIn","Pre-made collections of nucleic acid fragments can be added to samples during the library construction process to improve downstream quantification and statistical analyses. If such a 'spike-in' was used, this attribute specifies the name and manufacturer of the spike-in.","","","","False","","","","","string" -"readLength","The number of base pairs (bp) sequenced for reads in a fastq file.","","","","False","","","","int error","integer" -"plateID","An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.","","","","False","","","","","string" -"PMCID","Pubmed Central Identifier, formatted as a compact URI string that will automatically resolve into a URL based on the Synapse bioregistry; e.g., pmc:PMCxxxxxxxx","","","","False","","","","regex search ^pmc:PMC[0-9]{8} error","string" -"processedDataType","A label used for file annotations to provide a brief description of the processed data file.","barcode counts, differential expression results, epigenomic peaks, gene counts","","","False","","","","list like error","string" -"eventCount","The total number of events detected and captured in the corresponding FCS file. In cytometry, an event corresponds to particles detected and measured by the instrument. This number corresponds to the number of rows in the FCS file and is expected to be a whole integer number.","","","","False","","","","int error","integer" -"notes","Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.","","","","False","","","","","string" -"species","The genus species of sample or subject origin.","Homo sapiens","","","True","","","","","string" -"treatment","A short descriptive label indicating what experimental treatments have been applied to a biospecimen before data capture. Please include the word 'control' to indicate specific treatments that are intended to serve as a control group.","","","","True","","","","","string" -"treatmentTimepoint","Where applicable, specify the timepoint relative to treatment to distinguish different sets of samples that have undergone the same treatment but for different lengths of time. REQUIRED: please specify the unit of time, e.g., secs, mins, hrs, days, etc.","","","","False","","","","","string" -"slideID","A distinct label or name, unique within an experiment, assigned to an imaging slides.","","","","True","","","","","string" -"altSampleID","An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.","","","","False","","","","","string" -"sampleCollectionBatch","A label indicating batching of sample collection or experiment execution that occurs prior to data collection.","","","","False","","","","","string" -"associatedAccession","This is a File and Dataset annotation attribute indicating additional accessions (i.e., unique identifiers) associated with the data when the data has also been submitted to or can be found in other repositories such as GEO, SRA, dbGaP, etc.","","","","False","","","","list like error","string" +"Attribute","Description","Valid Values","DependsOn","Required","Parent","Source","Validation Rules","columnType","Format","Minimum","Maximum","Pattern" +"single specimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","biospecimenID, individualID","","","","","","","","","" +"resourceType","High-level classification of the file content","experimental data, metadata","","TRUE","","","list like error","string_list","","","","" +"fileFormat","Standard file format name or file extension","csv, tsv, txt, xlsx, xls, fam, bim, bed, bam, h5, mtx, bai, rds, tgz, zip, h5ad","","TRUE","","","","string","","","","" +"feature barcode sequencing","Is a valid value of `assay` that triggers conditional dependencies for additional attributes.","","targetPanel, targetPanelSynID, targetPanelSize","","","","","","","","","" +"rds","Is a valid value of 'fileFormat' that triggers conditional dependencies for additional attributes.","","RObjectClass","","","","","","","","","" +"experimental data","Is a valid value of 'resourceType' that triggers conditional dependencies for additional attributes.","","processedDataType","","","","","","","","","" +"metadata","Any file that contains curated data describing an experiment and experimental-derived data, including metadata about study subjects, biospecimens, protocols, assay reagents, marker panels, and ID mappings. Is a valid value of Resource Type that triggers conditional dependencies for additional attributes.","","metadataType","FALSE","","","","","","","","" +"metadataType","A label further classifying the content of metadata resource.","single-cell metadata","","TRUE","","","","string","","","","" +"scRNASeq Processed Data Annotation Template","A data contributor template outlining metadata to be collected as file annotations for scRNA-seq processed data files (i.e., anything not a fastq file).","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput","","","","","","","","","" +"BDM scRNASeq Processed Data Annotations","A template outlining metadata to be collected as file annotations for scRNA-seq processed data files (i.e., anything not a fastq file). to be compiled by ARK BDM.","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, visitID, libraryPrepMethod, nucleicAcidSource, alignmentReference, softwareAndVersion, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","","","" +"snRNASeq Processed Data Annotation Template","A data contributor template outlining metadata to be collected as file annotations for snRNA-seq processed data files (i.e., anything not a fastq file).","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput","","","","","","","","","" +"BDM snRNASeq Processed Data Annotations","A template outlining metadata to be collected as file annotations for snRNA-seq processed data files (i.e., anything not a fastq file). to be compiled by ARK BDM.","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, visitID, libraryPrepMethod, nucleicAcidSource, alignmentReference, softwareAndVersion, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","","","" +"snATACSeq Processed Data Annotation Template","A data contributor template outlining metadata to be collected as file annotations for snATAC-seq processed data files (i.e., anything not a fastq file).","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput","","","","","","","","","" +"BDM snATACSeq Processed Data Annotations","A template outlining metadata to be collected as file annotations for snATAC-seq processed data files (i.e., anything not a fastq file). to be compiled by ARK BDM.","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, cellRangerOutput, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, visitID, libraryPrepMethod, nucleicAcidSource, alignmentReference, softwareAndVersion, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","","","" +"scVDJSeq Processed Data Annotation Template","A data contributor template outlining metadata to be collected as file annotations for scVDJ-seq (i.e., immune repertoire profiling) processed data files (i.e., anything not a fastq file).","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality","","","","","","","","","" +"BDM scVDJSeq Processed Data Annotations","A template outlining metadata to be collected as file annotations for scVDJ-seq (i.e., immune repertoire profiling) processed data files (i.e., anything not a fastq file). to be compiled by ARK BDM.","","Component, fileFormat, assay, resourceType, dataLevel, specimenModality, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, visitID, libraryPrepMethod, nucleicAcidSource, alignmentReference, softwareAndVersion, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","","","" +"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" +"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","","","","","","","","" +"Component","A high-level attribute for grouping attributes into templates.","","","True","","","","string","","","","" +"program","Name of the funding program that supported the generation of data and associated files","AMP AIM, AMP RA/SLE, Community Contribution","","True","","","","string","","","","" +"project","A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.","AIM for RA, ELLIPSS, LOCKIT, RA, SLE, STAMP, UMass V-CoRT","","True","","","list like error","string_list","","","","" +"programPhase","A label noting which AMP RA/SLE program phase generated the data.","I, II","","True","","","list like error","string_list","","","","" +"dataType","High-level classification of the type of data contained in the file, loosely related to the experimental method or biological entity that is being profiled. Select all that apply using a comma-delimited list, though in most cases only a single label is expected. For multimodal datasets with concomitant profiling of biospecimen include 'multimodal'.","cytometry, epigenomics, genomics, histology, immune repertoire profiling, immunostaining, lipidomics, metabolomics, microbiome, multimodal, proteomics, transcriptomics","","True","","","list like error","string_list","","","","" +"dataSubtype","General classification to differentiate between omics profiling modalities. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list, however this is largely only expected in the context of Datasets and files that contain integrated experimental data spanning multiple types.","bulk, none, pseudobulk, single-cell, single-nucleus, spatial","","True","","","list like error","string_list","","","","" +"dataLevel","Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.","1, 2, 3, 4, 5","","True","","","","string","","","","" +"assay","The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.","ASAPSeq, CE-MS, CITESeq, CosMX, CyTOF, GenePS SeqFISH, H&E, LC-MS/MS, NULISA, Olink Explore HT, Olink Flex, Olink Focus, Olink Reveal, Olink Target 48, Olink Target 96, RNASeq, SNP array, SomaScan, VDJSeq, Visium, WES, WGS, Xenium, feature barcode sequencing, flow cytometry, imaging mass cytometry, imaging mass spectrometry, kiloplex, multiplexed ELISA, scRNASeq, scVDJSeq, serial IHC, snATACSeq, snRNASeq","","True","","","list like error","string_list","","","","" +"10xProbeSetReference","Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.","Flex Human Transcriptome Probe Set v1.0.1, Flex Human Transcriptome Probe Set v1.1.0, Visium Human Transcriptome v1, Visium Human Transcriptome v2, custom probe set","","True","","","","string","","","","" +"platform","The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.","BD FACSAria Fusion cell sorter, BD FACSAria III, BD FACSCanto, BD FACSCanto II, BD FACSDiscover A8, BD FACSDiscover S8, BD FACSLyric Clinical, BD FACSMelody, BD FACSymphony S6, BD LSRFortessa, Chromium Controller, Chromium GEM-X Single Cell 3' Chip v4, Chromium Next GEM Chip G, Chromium Next GEM Chip H, Chromium Next GEM Chip K, Chromium Next GEM Chip M, Chromium Next GEM Chip Q, Chromium X, Chromium Xo, Chromium iX, CyTOF XT, Cytek Aurora, Cytek Aurora Evo, Fluidigm BioMark, GEM-X Flex Gene Expression Chip, GEM-X OCM 5' Chip, Helios Mass Cytometer, Hyperion, Illumina HiSeq 2500, Illumina HiSeq X Ten, Illumina NextSeq 500, Illumina NovaSeq 6000, Illumina NovaSeq X, Not Applicable, Olink Signature Q100, Sony MA900, Thermo Fisher Attune CytPix, Thermo Fisher Attune NxT, Thermo Fisher Attune Xenith, Visium CytAssist, Xenium, none, unknown","","True","","","list like error","string_list","","","","" +"specimenModality","Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens","multispecimen, single specimen, unknown","","True","","","","string","","","","" +"individualID","Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.","","","True","","","#ClinicalMetadataTemplate unique error^^#BiospecimenMetadataTemplate str^^list like error","string_list","","","","" +"biospecimenID","A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.","","","True","","","#BiospecimenMetadataTemplate unique error^^list like error","string_list","","","","" +"parentBiospecimenID","The biospecimenID associated with the originating biospecimen for derived or child biospecimens.","","","False","","","","string","","","","" +"biospecimenType","A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.","PBMCs, cell line, fibroblast-like synoviocyte, kidney biopsy, none, plasma, primary cell culture, saliva, salivary gland, serum, skin biopsy, skin swab, stool, suction blister cells, suction blister fluid, synovial fluid, synovial tissue, total leukocytes, urine, uvea, whole blood","","True","","","#BiospecimenMetadataTemplate str^^#InVitroBiospecimenMetadataTemplate^^list like error","string_list","","","","" +"primaryCellSource","A label indicating the biological source material from which a primary cell culture was derived.","PBMCs, kidney, pannus-derived dermis, pannus-derived epidermis, salivary gland, synovial tissue, total leukocytes, urine, uvea, whole blood","","True","","","","string","","","","" +"codingLanguage","The coding, aka programming, language(s) contained in the file marked with `resourceType = code`. Select all that apply.","C, C#, C++, Fortan, Java, Julia, Matlab, Python, R, Ruby, SAS","","True","","","","string","","","","" +"visitID","Ordinal ID distinguishing different patient visits.","","","True","","","","string","","","","" +"softwareAndVersion","Relevant software and version used to generate the data file.","BD FACSDiva 8.0.1, Cell Ranger 9.0.1, Cell Ranger ATAC v1.1.0, Cell Ranger v3.0.0, Cell Ranger v3.0.1, Cell Ranger v3.0.2, Cell Ranger v3.1.0, Cell Ranger v4.0.0, Cell Ranger v5.0.0, Cell Ranger v5.0.1, Cell Ranger v6.0.0, Cell Ranger v6.0.1, Cell Ranger v6.0.2, Cell Ranger v6.1.0, Cell Ranger v6.1.1, Cell Ranger v6.1.2, Cell Ranger v7.0.0, Cell Ranger v7.0.1, Cell Ranger v7.1.0, Cell Ranger v7.2.0, Cell Ranger v8.0.0, Cell Ranger v8.0.1, Cell Ranger v9.0.0, Space Ranger 3.0.0, Space Ranger 3.0.1, Space Ranger 3.1.0, Space Ranger 3.1.1, Space Ranger 3.1.2, Space Ranger 3.1.3, demuxlet","","False","","","","string","","","","" +"targetPanel","A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.","","","True","","","","string","","","","" +"targetPanelSize","The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).","","","False","","","int error","integer","","","","" +"nucleicAcidSource","The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.","BCR mRNA, CRISPR protospacer feature barcode, TCR mRNA, Tn5-accessible gDNA, antigen capture barcode, gDNA, globin-depleted RNA, intracellular protein feature barcode, multiplexing oligo, poly(A) RNA, rRNA-depleted RNA, surface protein feature barcode","","True","","","list like error","string_list","","","","" +"biospecimenSubtype","Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.","FFPE tissue, PFA-fixed tissue, cell or tissue lysate, cell suspension, flow-sorted cells, fresh tissue, frozen tissue, nuclei suspension, supernatant","","False","","","","string","","","","" +"cellRangerOutput","10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz","Not Applicable, filtered MEX, filtered_feature_bc_matrix, filtered_peak_bc_matrix, raw MEX, raw_feature_bc_matrix, raw_peak_bc_matrix","","True","","","","string","","","","" +"alignmentReference","The genomic/transcriptomic reference used for performing read alignment against.","10x Cell Ranger Human GRCh38 2020-A, 10x Cell Ranger Human GRCh38 2024-A, GRCh38, modified GRCh38, unknown, vdj_GRCh38_alts_ensembl-4.0.0","","True","","","","string","","","","" +"libraryPrepMethod","Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.","10x Chromium Fixed RNA Human Transcriptome, 10x Chromium GEM-X Single Cell 3' v4, 10x Chromium GEM-X Single Cell 5' v3, 10x Chromium Next GEM Single Cell 3', 10x Chromium Next GEM Single Cell 3' 3.1, 10x Chromium Next GEM Single Cell 5' v1.1, 10x Chromium Next GEM Single Cell 5' v2, 10x Chromium Next GEM Single Cell ATAC v2, 10x Chromium Single Cell Human BCR, 10x Chromium Single Cell Human TCR, 10x GEM-X Flex Gene Expression Human, 10x GEM-X Universal 5' Gene Expression v3, CEL-Seq2, Chromium Next GEM Single Cell ATAC v1.1, Fluidigm C1 HT, NEBNext Human Immune Sequencing Kit, NEBNext Ultra II Directional RNA Library, Nextera XT, Nextera XT DNA, QIAseq miRNA Library, SMART-Seq Human BCR with UMI, SMART-Seq Human TCR with UMI, SMART-Seq v4 Ultra Low Input RNA, SMARTer Stranded Total RNA v2, Takara Human BCR profiling for Illumina, Takara Human TCR profiling for Illumina, Takara Human TCRv2 profiling for Illumina, Takara Human scTCR profiling for Illumina, TruSeq Stranded mRNA, custom DASH-treatment, in-house library prep","","True","","","","string","","","","" +"datasetType","High-level classification of dataset entity distinguishing between datasets compiled for a specific publication or as a general data resource.","experimental, publication","","True","","","","string","","","","" +"acknowledgmentStatement","A Dataset-specific attribute specifying the path to the wiki subpage within the ARK Portal - backend project that contains the acknowledgement statement that must be included in publications using data from the given dataset as a stipulation of the conditions of use.","syn26710600/wiki/619685","","True","","","","string","","","","" +"ARKRelease","A Dataset-specific attribute specifying the ARK Portal release in which this dataset was first made available to the public.","1.0, 2.0, 2024.06.R1, 2024.07.R1, 2024.08.R1, 2024.09.R1, 2024.10.R1, 2024.12.R1, 2025.01.R1, 2025.02.R1, 2025.03.R1, 2025.04.R1, 2025.05.R1, 2025.06.R1, 2025.07.R1, 2025.08.R1, 2025.09.R1, 2025.10.R1, 2025.11.R1, 2025.12.R1","","True","","","","string","","","","" +"RObjectClass","Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.","ROCR prediction.object, Seurat object, SummarizedExperiment, Symphony reference, data.frame, list, matrix, sparse matrix, vector","","False","","","","string","","","","" +"diagnosis","A high-level classifier indicating the disease status of an individual.","At-Risk RA, Not Applicable, OA, RA, SLE, Sjogren's disease, control, cutaneous lupus erythematosus, dermatomyositis, discoid lupus erythematosus, lupus nephritis, psoriasis, psoriatic arthritis, scleroderma, unknown, vitiligo","","True","","","list like error","string_list","","","","" +"libraryID","A library label or name, unique within an experiment, used to distinguish sequencing libraries.","","","True","","","","string","","","","" +"associatedCodeURL","A URL to the repository where associated code is available.","","","False","","","","string","uri","","","" +"FMAID","Functional Model of Anatomy ontology ID corresponding to the anatomical site and origin of the biospecimen. This attribute is assigned by ARK Portal data managers.","","","True","","https://bioportal.bioontology.org/ontologies/FMA/?p=summary","int error","integer","","","","" +"skinSiteStatus","Disease manifestation status of skin biospecimen.","healthy control, lesional, lesional proximal, non-lesional","","True","","","","string","","","","" +"salivaCollectionProcedure","Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.","stimulated, unstimulated","","True","","","","string","","","","" +"synovialCollectionProcedure","Classification of procedure for synovial tissue collection.","arthroplasty, biopsy, synovectomy, unknown","","True","","","","string","","","","" +"publicationType","General classification of publication.","correction, peer-reviewed, pre-print","","True","","","","string","","","","" +"title","Title of the publication.","","","True","","","","string","","","","" +"journal","Journal in which the publication was released","","","True","","","","string","","","","" +"publicationDate","The publication date extracted from PubMed database","","","True","","","","string","","","","" +"DOI","Digital object identifier","","","True","","","","string","uri-reference","","","" +"anatomicalSite","The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.","left 2nd MCP joint, left ankle joint, left hip joint, left knee joint, left wrist joint, other site, right 1st MTP joint, right 2nd MCP joint, right 2nd MTP joint, right 3rd MCP joint, right ankle joint, right hip joint, right knee joint, right wrist joint, unknown","","True","","","","string","","","","" +"vitiligoPattern","A high-level classification of vitiligo based on the distribution and patterning of lesions across the body.","mixed, non-segmental, segmental, unclassified","","False","","","","string","","","","" +"vitiligoPhenotype","Classification of vitiligo lesions which correlate with autoimmune activity and result in specific skin and depigmentation manifestations at the lesion site. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list.","active, confetti, inflammatory, none, trichrome","","True","","","list like error","string_list","","","","" +"psoriasisType","General type classification of psoriasis disease manifestation.","erythrodermic, guttate, inverse, plaque, pustular","","False","","","","string","","","","" +"ageDiagnosis","Age at which subject was diagnosed with `diagnosis`. If providing this value be sure the unit matches that used for `age`.","","","False","","","num error","number","","","","" +"age","Age at which subject was enrolled in study or age at corresponding visit and data collection event. If value unknown, enter '-1'.","","","True","","","num error","number","","","","" +"ageUnits","The unit of measure used for `ageEnrollment` and `ageDiagnosis`","months, years","","True","","","","string","","","","" +"sex","A textual description of a person's sex at birth.","female, intersex, male, unknown","","True","","","","string","","","","" +"height","Standing height of subject.","","","True","","","num error","number","","","","" +"race","A textual description of a person's race.","American Indian or Alaska Native, Asian, Black or African American, Hispanic, Mixed Race, Native Hawaiian or Other Pacific Islander, White, other, unknown","","False","","","","string","","","","" +"ethnicity","The ethnicity of a person.","Hispanic or Latino, Not Hispanic or Latino, unknown","","False","","","","string","","","","" +"heightUnits","Unit of measure of value provided for `height`.","centimeters, feet, inches, meters","","True","","","","string","","","","" +"weight","Weight of subject. If value unknown, enter '-1'.","","","True","","","num error","number","","","","" +"weightUnits","Abbreviated unit of measure of value provided for `weight`.","g, kg, lb, oz","","True","","","","string","","","","" +"comorbidities","Any diseases or medical condition that is simultaneously present in addition to `diagnosis`.","Hashimoto's Thyroiditis, autoimmune thyroid disease, cardiovascular disease, diabetes, inflammatory bowel disease, multiple sclerosis, other, psoriasis, psoriatic arthritis, pulmonary disease, rheumatoid arthritis, systemic lupus erythematosus","","False","","","list like error","string_list","","","","" +"diabetesType","Type of diabetes mellitus.","gestational, type 1, type 2, unknown","","False","","","","string","","","","" +"VIDA","Vitiligo Disease Activity Score, a six-point scale that evaluates the activity of vitiligo","","","False","","","","string","","","","" +"VASI","Total body Vitiligo Area Severity Index, a measure of the percentage of vitiligo involvement across the body calculated in terms of hand units.","","","False","","","","string","","","","" +"VETI","Vitiligo Extent Tensity Index measures the extent of vitiligo by a numerical score and combines analysis of extensity and severity of vitiligo to produce a constant and reproducible number.","","","False","","https://doi.org/10.5826/dpc.0404a18","num error","number","","","","" +"PASI","Psoriasis Area and Severity Index, is a measurement of the discoloration, thickness, scaling, and coverage of psoriasis plaques.","","","False","","","","string","","","","" +"CDASI","Cutaneous Dermatomyositis Disease Area and Severity Index, is a measurement used to characterize cutaneous dermatomyositis severity.","","","False","","","","string","","","","" +"inputCellCount","An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.","","","True","","","int error","integer","","","","" +"totalReads","Total number of reads sequenced from the library.","","","True","","","int error","integer","","","","" +"FACSPopulation","A description of the marker gating strategy used to derive the population cells with FACS.","","","True","","","","string","","","","" +"cellType","The cell type name from Cell Ontology for the corresponding CL identifier.","","","False","","","","string","","","","" +"userDefinedCellType","User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.","","","False","","","","string","","","","" +"krennInflammatory","A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennLining","A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennStroma","A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennSynovitisScore","The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"datasetStatus","A categorical label indicating the status of an ARK Portal dataset. This is applied to improve downstream management of datasets as well as various ETL workflows.","deprecated, released, test, under peer review, unreleased","","True","","","","string","","","","" +"sampleProcessingBatch","A label indicating batching of sample processing or preparation that occurs prior to data collection.","","","False","","","","string","","","","" +"dataCollectionBatch","A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.","","","False","","","","string","","","","" +"metadataStandards","Metadata standards used to generate the metadata","ARK data model, user-defined","","False","","","","string","","","","" +"skinSunExposure","For skin-based biospecimen, this attribute indicates whether the sample was collected from an anatomical site that does or does not receive routine sun exposure.","not sun exposed, sun exposed","","False","","","","string","","","","" +"sequencingSpikeIn","Pre-made sequencing libraries may be added to your sequencing run to improve sequencing results. If such a 'spike-in' library was used, this attribute specifies the name of the library and the percentage of the spike-in library used in the sequencing run.","","","False","","","","string","","","","" +"librarySpikeIn","Pre-made collections of nucleic acid fragments can be added to samples during the library construction process to improve downstream quantification and statistical analyses. If such a 'spike-in' was used, this attribute specifies the name and manufacturer of the spike-in.","","","False","","","","string","","","","" +"readLength","The number of base pairs (bp) sequenced for reads in a fastq file.","","","False","","","int error","integer","","","","" +"plateID","An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.","","","False","","","","string","","","","" +"processedDataType","A label used for file annotations to provide a brief description of the processed data file.","barcode counts, differential expression results, epigenomic peaks, gene counts","","False","","","list like error","string_list","","","","" +"eventCount","The total number of events detected and captured in the corresponding FCS file. In cytometry, an event corresponds to particles detected and measured by the instrument. This number corresponds to the number of rows in the FCS file and is expected to be a whole integer number.","","","False","","","int error","integer","","","","" +"notes","Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.","","","False","","","","string","","","","" +"species","The genus species of sample or subject origin.","Homo sapiens","","True","","","","string","","","","" +"treatment","A short descriptive label indicating what experimental treatments have been applied to a biospecimen before data capture. Please include the word 'control' to indicate specific treatments that are intended to serve as a control group.","","","True","","","","string","","","","" +"treatmentTimepoint","Where applicable, specify the timepoint relative to treatment to distinguish different sets of samples that have undergone the same treatment but for different lengths of time. REQUIRED: please specify the unit of time, e.g., secs, mins, hrs, days, etc.","","","False","","","","string","","","","" +"slideID","A distinct label or name, unique within an experiment, assigned to an imaging slides.","","","True","","","","string","","","","" +"altSampleID","An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.","","","False","","","","string","","","","" +"sampleCollectionBatch","A label indicating batching of sample collection or experiment execution that occurs prior to data collection.","","","False","","","","string","","","","" +"associatedAccession","This is a File and Dataset annotation attribute indicating additional accessions (i.e., unique identifiers) associated with the data when the data has also been submitted to or can be found in other repositories such as GEO, SRA, dbGaP, etc.","","","False","","","list like error","string_list","","","","" +"percentCellViability","A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.","","","True","","","inRange 50 100 error","integer","","50","100","" +"sequencingSaturation","A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.","","","False","","https://kb.10xgenomics.com/hc/en-us/articles/115005062366-What-is-sequencing-saturation","inRange 0 1 error","number","","0","1","" +"publicationSynID","The synID of the corresponding Synapse entity that stores metadata about the publication. This is used to differentiate publication-specific files, often consisting of level 4 processed data and expanded subject metadata, in a publication dataset that also includes raw or minimally processed files from experimental datasets. This provides an easy way to distinguish and select for the publication-specific data from which the research findings were derived. When this attribute is used to annotate a Dataset it serves as a way to directly link the Dataset entity with the publication metadata stored in Synapse.","","","False","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"associatedDataset","The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.","","","False","","","list like::regex search ^syn[0-9]{8} error","string_list","","","","^syn[0-9]{8}" +"custom10xProbeSetSynID","If custom modified probe sets were used for collecting 10x Chromium Flex scRNA-seq data, then the probe reference files should be grouped as a zip or tar archive and uploaded as metadata. This attribute links the experimental data back to the probe set archive file via a synapse ID.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"targetPanelSynID","In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"datasetDescription","A Dataset-specific attribute specifying the synID of the folder that contains a wiki write-up of the dataset description. This wiki content will be surfaced on the ARK Portal frontend site.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"DOID","Disease ontology identifier associated with `diagnosis`. Attribute values are applied by ARK Portal data managers.","","","True","","","regex search ^DOID error","string","","","","^DOID" +"BRENDA","BRENDA Tissue and Enzyme Source Ontology ID (BTO) corresponding to the `biospecimenType`. Attribute values are applied by ARK Portal data managers.","","","True","","https://bioportal.bioontology.org/ontologies/BTO","regex search ^BTO error","string","","","","^BTO" +"year","Year (YYYY) in which the paper was published.","","","True","","","regex search [1-2][0-9]{3} error","string","","","","[1-2][0-9]{3}" +"PMID","PubMed(R) Identifier","","","True","","","regex search ^PMID error","string","","","","^PMID" +"ImmPortAccession","Accession to corresponding information in ImmPort.","","","False","","","regex search ^SDY error","string","","","","^SDY" +"cellOntologyID","Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.","","","False","","","regex search ^CL: error","string","","","","^CL:" +"PMCID","Pubmed Central Identifier, formatted as a compact URI string that will automatically resolve into a URL based on the Synapse bioregistry; e.g., pmc:PMCxxxxxxxx","","","False","","","regex search ^pmc:PMC[0-9]{8} error","string","","","","^pmc:PMC[0-9]{8}" diff --git a/model_contexts/spatial/ark.spatial_context.csv b/model_contexts/spatial/ark.spatial_context.csv index 37a9c547..570f47ba 100644 --- a/model_contexts/spatial/ark.spatial_context.csv +++ b/model_contexts/spatial/ark.spatial_context.csv @@ -1,14 +1,14 @@ -"Attribute","Description","Valid Values","DependsOn","Properties","Required","Parent","DependsOn Component","Source","Validation Rules","columnType" -"Spatial Imaging Assay Metadata Template","A template outlining metadata to be collected for each slide in a spatial transcriptomic or imaging based dataset.","","Component, assay, specimenModality, sampleProcessingBatch, dataCollectionBatch, targetPanel, targetPanelSynID, targetPanelSize","","","","","","","" -"Spatial Imaging File Annotation Template","A data contributor template outlining metadata to be collected as file annotations for imaging-based data files.","","Component, fileFormat, assay, resourceType, specimenModality","","","","","","","" -"BDM Spatial Imaging File Annotations","A template outlining as file annotations for imaging-based data files to be compiled by ARK BDM.","","Component, fileFormat, assay, resourceType, specimenModality, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, visitID, targetPanel, targetPanelSynID, targetPanelSize, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","" -"single specimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","biospecimenID","","","","","","","" -"multispecimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","slideID","","","","","","","" -"resourceType","","experimental data, metadata","","","TRUE","","","","list like error","string" -"assay","","Xenium, CosMX, GenePS SeqFISH, imaging mass cytometry, Visium, H&E, serial IHC","","","TRUE","","","","","string" -"Rds","Is a valid value of 'fileFormat' that triggers conditional dependencies for additional attributes.","","RObjectClass","","FALSE","","","","","" -"experimental data","Is a valid value of 'resourceType' that triggers conditional dependencies for additional attributes.","","processedDataType","","FALSE","","","","","" -"metadata","Is a valid value of Resource Type that triggers conditional dependencies for additional attributes. Any file that contains curated data describing an experiment and experimental-derived data, including metadata about study subjects, biospecimens, protocols, assay reagents, marker panels, and ID mappings","","metadataType","","FALSE","","","","","" -"metadataType","","single-cell metadata, cell coordinates, target panel, tissue microarray map, other","","","TRUE","","","","","string" -"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","FALSE","","","","","" -"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","FALSE","","","","","" +"Attribute","Description","Valid Values","DependsOn","Required","Parent","Source","Validation Rules","columnType","Format","Minimum","Maximum","Pattern" +"Spatial Imaging Assay Metadata Template","A template outlining metadata to be collected for each slide in a spatial transcriptomic or imaging based dataset.","","Component, assay, specimenModality, sampleProcessingBatch, dataCollectionBatch, targetPanel, targetPanelSynID, targetPanelSize","","","","","","","","","" +"Spatial Imaging File Annotation Template","A data contributor template outlining metadata to be collected as file annotations for imaging-based data files.","","Component, fileFormat, assay, resourceType, specimenModality","","","","","","","","","" +"BDM Spatial Imaging File Annotations","A template outlining as file annotations for imaging-based data files to be compiled by ARK BDM.","","Component, fileFormat, assay, resourceType, specimenModality, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, visitID, targetPanel, targetPanelSynID, targetPanelSize, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","","","" +"single specimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","biospecimenID","","","","","","","","","" +"multispecimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","slideID","","","","","","","","","" +"resourceType","","experimental data, metadata","","TRUE","","","list like error","string_list","","","","" +"assay","","Xenium, CosMX, GenePS SeqFISH, imaging mass cytometry, Visium, H&E, serial IHC","","TRUE","","","","string","","","","" +"rds","Is a valid value of 'fileFormat' that triggers conditional dependencies for additional attributes.","","RObjectClass","FALSE","","","","","","","","" +"experimental data","Is a valid value of 'resourceType' that triggers conditional dependencies for additional attributes.","","processedDataType","FALSE","","","","","","","","" +"metadata","Is a valid value of Resource Type that triggers conditional dependencies for additional attributes. Any file that contains curated data describing an experiment and experimental-derived data, including metadata about study subjects, biospecimens, protocols, assay reagents, marker panels, and ID mappings","","metadataType","FALSE","","","","","","","","" +"metadataType","","single-cell metadata, cell coordinates, target panel, tissue microarray map, other","","TRUE","","","","string","","","","" +"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","FALSE","","","","","","","","" +"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","FALSE","","","","","","","","" diff --git a/model_contexts/spatial/ark.spatial_model.csv b/model_contexts/spatial/ark.spatial_model.csv index 6391dfaa..7470aa0d 100644 --- a/model_contexts/spatial/ark.spatial_model.csv +++ b/model_contexts/spatial/ark.spatial_model.csv @@ -1,119 +1,119 @@ -"Attribute","Description","Valid Values","DependsOn","Properties","Required","Parent","DependsOn Component","Source","Validation Rules","columnType" -"Spatial Imaging Assay Metadata Template","A template outlining metadata to be collected for each slide in a spatial transcriptomic or imaging based dataset.","","Component, assay, specimenModality, sampleProcessingBatch, dataCollectionBatch, targetPanel, targetPanelSynID, targetPanelSize","","","","","","","" -"Spatial Imaging File Annotation Template","A data contributor template outlining metadata to be collected as file annotations for imaging-based data files.","","Component, fileFormat, assay, resourceType, specimenModality","","","","","","","" -"BDM Spatial Imaging File Annotations","A template outlining as file annotations for imaging-based data files to be compiled by ARK BDM.","","Component, fileFormat, assay, resourceType, specimenModality, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, visitID, targetPanel, targetPanelSynID, targetPanelSize, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","" -"single specimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","biospecimenID","","","","","","","" -"multispecimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","slideID","","","","","","","" -"resourceType","High-level classification of the file content","experimental data, metadata","","","TRUE","","","","list like error","string" -"assay","The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.","Xenium, CosMX, GenePS SeqFISH, imaging mass cytometry, Visium, H&E, serial IHC","","","TRUE","","","","","string" -"Rds","Is a valid value of 'fileFormat' that triggers conditional dependencies for additional attributes.","","RObjectClass","","FALSE","","","","","" -"experimental data","Is a valid value of 'resourceType' that triggers conditional dependencies for additional attributes.","","processedDataType","","FALSE","","","","","" -"metadata","Is a valid value of Resource Type that triggers conditional dependencies for additional attributes. Any file that contains curated data describing an experiment and experimental-derived data, including metadata about study subjects, biospecimens, protocols, assay reagents, marker panels, and ID mappings","","metadataType","","FALSE","","","","","" -"metadataType","A label further classifying the content of metadata resource.","single-cell metadata, cell coordinates, target panel, tissue microarray map, other","","","TRUE","","","","","string" -"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","FALSE","","","","","" -"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","","FALSE","","","","","" -"Component","A high-level attribute for grouping attributes into templates.","","","","True","","","","","string" -"fileFormat","Standard file format name or file extension","bai, bam, bed, bim, csv, czi, docx, dose, erate, fam, fastq, fcs, geojson, h5, h5ad, info, mcd, mtx, parquet, pdf, py, rds, rec, svs, tbi, tgz, tsv, txt, vcf, xls, xlsx, zip","","","True","","","","","string" -"publicationSynID","The synID of the corresponding Synapse entity that stores metadata about the publication. This is used to differentiate publication-specific files, often consisting of level 4 processed data and expanded subject metadata, in a publication dataset that also includes raw or minimally processed files from experimental datasets. This provides an easy way to distinguish and select for the publication-specific data from which the research findings were derived. When this attribute is used to annotate a Dataset it serves as a way to directly link the Dataset entity with the publication metadata stored in Synapse.","","","","False","","","","regex search ^syn[0-9]{8} error","string" -"associatedDataset","The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.","","","","False","","","","list like::regex search ^syn[0-9]{8} error","string" -"program","Name of the funding program that supported the generation of data and associated files","AMP AIM, AMP RA/SLE, Community Contribution","","","True","","","","","string" -"project","A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.","AIM for RA, ELLIPSS, LOCKIT, RA, SLE, STAMP, UMass V-CoRT","","","True","","","","list like error","string" -"programPhase","A label noting which AMP RA/SLE program phase generated the data.","I, II","","","True","","","","list like error","string" -"dataType","High-level classification of the type of data contained in the file, loosely related to the experimental method or biological entity that is being profiled. Select all that apply using a comma-delimited list, though in most cases only a single label is expected. For multimodal datasets with concomitant profiling of biospecimen include 'multimodal'.","cytometry, epigenomics, genomics, histology, immune repertoire profiling, immunostaining, lipidomics, metabolomics, microbiome, multimodal, proteomics, transcriptomics","","","True","","","","list like error","string" -"dataSubtype","General classification to differentiate between omics profiling modalities. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list, however this is largely only expected in the context of Datasets and files that contain integrated experimental data spanning multiple types.","bulk, none, pseudobulk, single-cell, single-nucleus, spatial","","","True","","","","list like error","string" -"dataLevel","Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.","1, 2, 3, 4, 5","","","True","","","","","string" -"10xProbeSetReference","Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.","Flex Human Transcriptome Probe Set v1.0.1, Flex Human Transcriptome Probe Set v1.1.0, Visium Human Transcriptome v1, Visium Human Transcriptome v2, custom probe set","","","True","","","","","string" -"custom10xProbeSetSynID","If custom modified probe sets were used for collecting 10x Chromium Flex scRNA-seq data, then the probe reference files should be grouped as a zip or tar archive and uploaded as metadata. This attribute links the experimental data back to the probe set archive file via a synapse ID.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"platform","The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.","BD FACSAria Fusion cell sorter, BD FACSAria III, BD FACSCanto, BD FACSCanto II, BD FACSDiscover A8, BD FACSDiscover S8, BD FACSLyric Clinical, BD FACSMelody, BD FACSymphony S6, BD LSRFortessa, Chromium Controller, Chromium GEM-X Single Cell 3' Chip v4, Chromium Next GEM Chip G, Chromium Next GEM Chip H, Chromium Next GEM Chip K, Chromium Next GEM Chip M, Chromium Next GEM Chip Q, Chromium X, Chromium Xo, Chromium iX, CyTOF XT, Cytek Aurora, Cytek Aurora Evo, Fluidigm BioMark, GEM-X Flex Gene Expression Chip, GEM-X OCM 5' Chip, Helios Mass Cytometer, Hyperion, Illumina HiSeq 2500, Illumina HiSeq X Ten, Illumina NextSeq 500, Illumina NovaSeq 6000, Illumina NovaSeq X, Not Applicable, Olink Signature Q100, Sony MA900, Thermo Fisher Attune CytPix, Thermo Fisher Attune NxT, Thermo Fisher Attune Xenith, Visium CytAssist, Xenium, none, unknown","","","True","","","","list like error","string" -"specimenModality","Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens","multispecimen, single specimen, unknown","","","True","","","","","string" -"individualID","Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.","","","","True","","","","#ClinicalMetadataTemplate unique error^^#BiospecimenMetadataTemplate str^^list like error","string" -"biospecimenID","A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.","","","","True","","","","#BiospecimenMetadataTemplate unique error^^list like error","string" -"parentBiospecimenID","The biospecimenID associated with the originating biospecimen for derived or child biospecimens.","","","","False","","","","","string" -"biospecimenType","A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.","PBMCs, cell line, fibroblast-like synoviocyte, kidney biopsy, none, plasma, primary cell culture, saliva, salivary gland, serum, skin biopsy, skin swab, stool, suction blister cells, suction blister fluid, synovial fluid, synovial tissue, total leukocytes, urine, uvea, whole blood","","","True","","","","#BiospecimenMetadataTemplate str^^#InVitroBiospecimenMetadataTemplate^^list like error","string" -"primaryCellSource","A label indicating the biological source material from which a primary cell culture was derived.","PBMCs, kidney, pannus-derived dermis, pannus-derived epidermis, salivary gland, synovial tissue, total leukocytes, urine, uvea, whole blood","","","True","","","","","string" -"codingLanguage","The coding, aka programming, language(s) contained in the file marked with `resourceType = code`. Select all that apply.","C, C#, C++, Fortan, Java, Julia, Matlab, Python, R, Ruby, SAS","","","True","","","","","string" -"visitID","Ordinal ID distinguishing different patient visits.","","","","True","","","","","string" -"softwareAndVersion","Relevant software and version used to generate the data file.","BD FACSDiva 8.0.1, Cell Ranger 9.0.1, Cell Ranger ATAC v1.1.0, Cell Ranger v3.0.0, Cell Ranger v3.0.1, Cell Ranger v3.0.2, Cell Ranger v3.1.0, Cell Ranger v4.0.0, Cell Ranger v5.0.0, Cell Ranger v5.0.1, Cell Ranger v6.0.0, Cell Ranger v6.0.1, Cell Ranger v6.0.2, Cell Ranger v6.1.0, Cell Ranger v6.1.1, Cell Ranger v6.1.2, Cell Ranger v7.0.0, Cell Ranger v7.0.1, Cell Ranger v7.1.0, Cell Ranger v7.2.0, Cell Ranger v8.0.0, Cell Ranger v8.0.1, Cell Ranger v9.0.0, Space Ranger 3.0.0, Space Ranger 3.0.1, Space Ranger 3.1.0, Space Ranger 3.1.1, Space Ranger 3.1.2, Space Ranger 3.1.3, demuxlet","","","False","","","","","string" -"targetPanel","A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.","","","","True","","","","","string" -"targetPanelSynID","In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"targetPanelSize","The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).","","","","False","","","","int error","integer" -"nucleicAcidSource","The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.","BCR mRNA, CRISPR protospacer feature barcode, TCR mRNA, Tn5-accessible gDNA, antigen capture barcode, gDNA, globin-depleted RNA, intracellular protein feature barcode, multiplexing oligo, poly(A) RNA, rRNA-depleted RNA, surface protein feature barcode","","","True","","","","list like error","string" -"biospecimenSubtype","Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.","FFPE tissue, PFA-fixed tissue, cell or tissue lysate, cell suspension, flow-sorted cells, fresh tissue, frozen tissue, nuclei suspension, supernatant","","","False","","","","","string" -"cellRangerOutput","10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz","Not Applicable, filtered MEX, filtered_feature_bc_matrix, filtered_peak_bc_matrix, raw MEX, raw_feature_bc_matrix, raw_peak_bc_matrix","","","True","","","","","string" -"alignmentReference","The genomic/transcriptomic reference used for performing read alignment against.","10x Cell Ranger Human GRCh38 2020-A, 10x Cell Ranger Human GRCh38 2024-A, GRCh38, modified GRCh38, unknown, vdj_GRCh38_alts_ensembl-4.0.0","","","True","","","","","string" -"libraryPrepMethod","Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.","10x Chromium Fixed RNA Human Transcriptome, 10x Chromium GEM-X Single Cell 3' v4, 10x Chromium GEM-X Single Cell 5' v3, 10x Chromium Next GEM Single Cell 3', 10x Chromium Next GEM Single Cell 3' 3.1, 10x Chromium Next GEM Single Cell 5' v1.1, 10x Chromium Next GEM Single Cell 5' v2, 10x Chromium Next GEM Single Cell ATAC v2, 10x Chromium Single Cell Human BCR, 10x Chromium Single Cell Human TCR, 10x GEM-X Flex Gene Expression Human, 10x GEM-X Universal 5' Gene Expression v3, CEL-Seq2, Chromium Next GEM Single Cell ATAC v1.1, Fluidigm C1 HT, NEBNext Human Immune Sequencing Kit, NEBNext Ultra II Directional RNA Library, Nextera XT, Nextera XT DNA, QIAseq miRNA Library, SMART-Seq Human BCR with UMI, SMART-Seq Human TCR with UMI, SMART-Seq v4 Ultra Low Input RNA, SMARTer Stranded Total RNA v2, Takara Human BCR profiling for Illumina, Takara Human TCR profiling for Illumina, Takara Human TCRv2 profiling for Illumina, Takara Human scTCR profiling for Illumina, TruSeq Stranded mRNA, custom DASH-treatment, in-house library prep","","","True","","","","","string" -"datasetType","High-level classification of dataset entity distinguishing between datasets compiled for a specific publication or as a general data resource.","experimental, publication","","","True","","","","","string" -"acknowledgmentStatement","A Dataset-specific attribute specifying the path to the wiki subpage within the ARK Portal - backend project that contains the acknowledgement statement that must be included in publications using data from the given dataset as a stipulation of the conditions of use.","syn26710600/wiki/619685","","","True","","","","","string" -"datasetDescription","A Dataset-specific attribute specifying the synID of the folder that contains a wiki write-up of the dataset description. This wiki content will be surfaced on the ARK Portal frontend site.","","","","True","","","","regex search ^syn[0-9]{8} error","string" -"ARKRelease","A Dataset-specific attribute specifying the ARK Portal release in which this dataset was first made available to the public.","1.0, 2.0, 2024.06.R1, 2024.07.R1, 2024.08.R1, 2024.09.R1, 2024.10.R1, 2024.12.R1, 2025.01.R1, 2025.02.R1, 2025.03.R1, 2025.04.R1, 2025.05.R1, 2025.06.R1, 2025.07.R1, 2025.08.R1, 2025.09.R1, 2025.10.R1, 2025.11.R1, 2025.12.R1","","","True","","","","","string" -"RObjectClass","Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.","ROCR prediction.object, Seurat object, SummarizedExperiment, Symphony reference, data.frame, list, matrix, sparse matrix, vector","","","False","","","","","string" -"diagnosis","A high-level classifier indicating the disease status of an individual.","At-Risk RA, Not Applicable, OA, RA, SLE, Sjogren's disease, control, cutaneous lupus erythematosus, dermatomyositis, discoid lupus erythematosus, lupus nephritis, psoriasis, psoriatic arthritis, scleroderma, unknown, vitiligo","","","True","","","","list like error","string" -"DOID","Disease ontology identifier associated with `diagnosis`. Attribute values are applied by ARK Portal data managers.","","","","True","","","","regex search ^DOID error","string" -"libraryID","A library label or name, unique within an experiment, used to distinguish sequencing libraries.","","","","True","","","","","string" -"associatedCodeURL","A URL to the repository where associated code is available.","","","","False","","","","","string" -"FMAID","Functional Model of Anatomy ontology ID corresponding to the anatomical site and origin of the biospecimen. This attribute is assigned by ARK Portal data managers.","","","","True","","","https://bioportal.bioontology.org/ontologies/FMA/?p=summary","int error","integer" -"BRENDA","BRENDA Tissue and Enzyme Source Ontology ID (BTO) corresponding to the `biospecimenType`. Attribute values are applied by ARK Portal data managers.","","","","True","","","https://bioportal.bioontology.org/ontologies/BTO","regex search ^BTO error","string" -"skinSiteStatus","Disease manifestation status of skin biospecimen.","healthy control, lesional, lesional proximal, non-lesional","","","True","","","","","string" -"salivaCollectionProcedure","Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.","stimulated, unstimulated","","","True","","","","","string" -"synovialCollectionProcedure","Classification of procedure for synovial tissue collection.","arthroplasty, biopsy, synovectomy, unknown","","","True","","","","","string" -"publicationType","General classification of publication.","correction, peer-reviewed, pre-print","","","True","","","","","string" -"title","Title of the publication.","","","","True","","","","","string" -"journal","Journal in which the publication was released","","","","True","","","","","string" -"year","Year (YYYY) in which the paper was published.","","","","True","","","","regex search [1-2][0-9]{3} error","string" -"publicationDate","The publication date extracted from PubMed database","","","","True","","","","","string" -"PMID","PubMed(R) Identifier","","","","True","","","","regex search ^PMID error","string" -"DOI","Digital object identifier","","","","True","","","","","string" -"anatomicalSite","The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.","left 2nd MCP joint, left ankle joint, left hip joint, left knee joint, left wrist joint, other site, right 1st MTP joint, right 2nd MCP joint, right 2nd MTP joint, right 3rd MCP joint, right ankle joint, right hip joint, right knee joint, right wrist joint, unknown","","","True","","","","","string" -"vitiligoPattern","A high-level classification of vitiligo based on the distribution and patterning of lesions across the body.","mixed, non-segmental, segmental, unclassified","","","False","","","","","string" -"vitiligoPhenotype","Classification of vitiligo lesions which correlate with autoimmune activity and result in specific skin and depigmentation manifestations at the lesion site. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list.","active, confetti, inflammatory, none, trichrome","","","True","","","","list like error","string" -"psoriasisType","General type classification of psoriasis disease manifestation.","erythrodermic, guttate, inverse, plaque, pustular","","","False","","","","","string" -"ageDiagnosis","Age at which subject was diagnosed with `diagnosis`. If providing this value be sure the unit matches that used for `age`.","","","","False","","","","num error","number" -"age","Age at which subject was enrolled in study or age at corresponding visit and data collection event. If value unknown, enter '-1'.","","","","True","","","","num error","number" -"ageUnits","The unit of measure used for `ageEnrollment` and `ageDiagnosis`","months, years","","","True","","","","","string" -"sex","A textual description of a person's sex at birth.","female, intersex, male, unknown","","","True","","","","","string" -"height","Standing height of subject.","","","","True","","","","num error","number" -"race","A textual description of a person's race.","American Indian or Alaska Native, Asian, Black or African American, Hispanic, Mixed Race, Native Hawaiian or Other Pacific Islander, White, other, unknown","","","False","","","","","string" -"ethnicity","The ethnicity of a person.","Hispanic or Latino, Not Hispanic or Latino, unknown","","","False","","","","","string" -"heightUnits","Unit of measure of value provided for `height`.","centimeters, feet, inches, meters","","","True","","","","","string" -"weight","Weight of subject. If value unknown, enter '-1'.","","","","True","","","","num error","number" -"weightUnits","Abbreviated unit of measure of value provided for `weight`.","g, kg, lb, oz","","","True","","","","","string" -"comorbidities","Any diseases or medical condition that is simultaneously present in addition to `diagnosis`.","Hashimoto's Thyroiditis, autoimmune thyroid disease, cardiovascular disease, diabetes, inflammatory bowel disease, multiple sclerosis, other, psoriasis, psoriatic arthritis, pulmonary disease, rheumatoid arthritis, systemic lupus erythematosus","","","False","","","","list like error","string" -"diabetesType","Type of diabetes mellitus.","gestational, type 1, type 2, unknown","","","False","","","","","string" -"VIDA","Vitiligo Disease Activity Score, a six-point scale that evaluates the activity of vitiligo","","","","False","","","","","string" -"VASI","Total body Vitiligo Area Severity Index, a measure of the percentage of vitiligo involvement across the body calculated in terms of hand units.","","","","False","","","","","string" -"VETI","Vitiligo Extent Tensity Index measures the extent of vitiligo by a numerical score and combines analysis of extensity and severity of vitiligo to produce a constant and reproducible number.","","","","False","","","https://doi.org/10.5826/dpc.0404a18","num error","number" -"PASI","Psoriasis Area and Severity Index, is a measurement of the discoloration, thickness, scaling, and coverage of psoriasis plaques.","","","","False","","","","","string" -"CDASI","Cutaneous Dermatomyositis Disease Area and Severity Index, is a measurement used to characterize cutaneous dermatomyositis severity.","","","","False","","","","","string" -"percentCellViability","A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.","","","","True","","","","inRange 50 100 error","integer" -"inputCellCount","An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.","","","","True","","","","int error","integer" -"totalReads","Total number of reads sequenced from the library.","","","","True","","","","int error","integer" -"sequencingSaturation","A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.","","","","False","","","https://kb.10xgenomics.com/hc/en-us/articles/115005062366-What-is-sequencing-saturation","inRange 0 1 error","number" -"ImmPortAccession","Accession to corresponding information in ImmPort.","","","","False","","","","regex search ^SDY error","string" -"FACSPopulation","A description of the marker gating strategy used to derive the population cells with FACS.","","","","True","","","","","string" -"cellOntologyID","Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.","","","","False","","","","regex search ^CL: error","string" -"cellType","The cell type name from Cell Ontology for the corresponding CL identifier.","","","","False","","","","","string" -"userDefinedCellType","User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.","","","","False","","","","","string" -"krennInflammatory","A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennLining","A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennStroma","A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"krennSynovitisScore","The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","","False","","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number" -"datasetStatus","A categorical label indicating the status of an ARK Portal dataset. This is applied to improve downstream management of datasets as well as various ETL workflows.","deprecated, released, test, under peer review, unreleased","","","True","","","","","string" -"sampleProcessingBatch","A label indicating batching of sample processing or preparation that occurs prior to data collection.","","","","False","","","","","string" -"dataCollectionBatch","A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.","","","","False","","","","","string" -"metadataStandards","Metadata standards used to generate the metadata","ARK data model, user-defined","","","False","","","","","string" -"skinSunExposure","For skin-based biospecimen, this attribute indicates whether the sample was collected from an anatomical site that does or does not receive routine sun exposure.","not sun exposed, sun exposed","","","False","","","","","string" -"sequencingSpikeIn","Pre-made sequencing libraries may be added to your sequencing run to improve sequencing results. If such a 'spike-in' library was used, this attribute specifies the name of the library and the percentage of the spike-in library used in the sequencing run.","","","","False","","","","","string" -"librarySpikeIn","Pre-made collections of nucleic acid fragments can be added to samples during the library construction process to improve downstream quantification and statistical analyses. If such a 'spike-in' was used, this attribute specifies the name and manufacturer of the spike-in.","","","","False","","","","","string" -"readLength","The number of base pairs (bp) sequenced for reads in a fastq file.","","","","False","","","","int error","integer" -"plateID","An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.","","","","False","","","","","string" -"PMCID","Pubmed Central Identifier, formatted as a compact URI string that will automatically resolve into a URL based on the Synapse bioregistry; e.g., pmc:PMCxxxxxxxx","","","","False","","","","regex search ^pmc:PMC[0-9]{8} error","string" -"processedDataType","A label used for file annotations to provide a brief description of the processed data file.","barcode counts, differential expression results, epigenomic peaks, gene counts","","","False","","","","list like error","string" -"eventCount","The total number of events detected and captured in the corresponding FCS file. In cytometry, an event corresponds to particles detected and measured by the instrument. This number corresponds to the number of rows in the FCS file and is expected to be a whole integer number.","","","","False","","","","int error","integer" -"notes","Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.","","","","False","","","","","string" -"species","The genus species of sample or subject origin.","Homo sapiens","","","True","","","","","string" -"treatment","A short descriptive label indicating what experimental treatments have been applied to a biospecimen before data capture. Please include the word 'control' to indicate specific treatments that are intended to serve as a control group.","","","","True","","","","","string" -"treatmentTimepoint","Where applicable, specify the timepoint relative to treatment to distinguish different sets of samples that have undergone the same treatment but for different lengths of time. REQUIRED: please specify the unit of time, e.g., secs, mins, hrs, days, etc.","","","","False","","","","","string" -"slideID","A distinct label or name, unique within an experiment, assigned to an imaging slides.","","","","True","","","","","string" -"altSampleID","An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.","","","","False","","","","","string" -"sampleCollectionBatch","A label indicating batching of sample collection or experiment execution that occurs prior to data collection.","","","","False","","","","","string" -"associatedAccession","This is a File and Dataset annotation attribute indicating additional accessions (i.e., unique identifiers) associated with the data when the data has also been submitted to or can be found in other repositories such as GEO, SRA, dbGaP, etc.","","","","False","","","","list like error","string" +"Attribute","Description","Valid Values","DependsOn","Required","Parent","Source","Validation Rules","columnType","Format","Minimum","Maximum","Pattern" +"Spatial Imaging Assay Metadata Template","A template outlining metadata to be collected for each slide in a spatial transcriptomic or imaging based dataset.","","Component, assay, specimenModality, sampleProcessingBatch, dataCollectionBatch, targetPanel, targetPanelSynID, targetPanelSize","","","","","","","","","" +"Spatial Imaging File Annotation Template","A data contributor template outlining metadata to be collected as file annotations for imaging-based data files.","","Component, fileFormat, assay, resourceType, specimenModality","","","","","","","","","" +"BDM Spatial Imaging File Annotations","A template outlining as file annotations for imaging-based data files to be compiled by ARK BDM.","","Component, fileFormat, assay, resourceType, specimenModality, parentBiospecimenID, biospecimenType, biospecimenSubtype, dataType, dataSubtype, diagnosis, visitID, targetPanel, targetPanelSynID, targetPanelSize, program, project, species, primaryCellSource, cellType, userDefinedCellType","","","","","","","","","" +"single specimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","biospecimenID","","","","","","","","","" +"multispecimen","Is a valid value of `specimenModality` that triggers conditional dependencies for additional attributes.","","slideID","","","","","","","","","" +"resourceType","High-level classification of the file content","experimental data, metadata","","TRUE","","","list like error","string_list","","","","" +"assay","The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.","Xenium, CosMX, GenePS SeqFISH, imaging mass cytometry, Visium, H&E, serial IHC","","TRUE","","","","string","","","","" +"rds","Is a valid value of 'fileFormat' that triggers conditional dependencies for additional attributes.","","RObjectClass","FALSE","","","","","","","","" +"experimental data","Is a valid value of 'resourceType' that triggers conditional dependencies for additional attributes.","","processedDataType","FALSE","","","","","","","","" +"metadata","Is a valid value of Resource Type that triggers conditional dependencies for additional attributes. Any file that contains curated data describing an experiment and experimental-derived data, including metadata about study subjects, biospecimens, protocols, assay reagents, marker panels, and ID mappings","","metadataType","FALSE","","","","","","","","" +"metadataType","A label further classifying the content of metadata resource.","single-cell metadata, cell coordinates, target panel, tissue microarray map, other","","TRUE","","","","string","","","","" +"AMP RA/SLE","Accelerating Medicines Partnership Rheumatoid Arthritis and Systemic Lupus Erythematosus. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","FALSE","","","","","","","","" +"AMP AIM","Accelerating Medicines Partnership Autoimmune and Immune-Mediated Diseases. Is a valid value for 'program' that triggers conditional dependencies for additional attributes.","","visitID","FALSE","","","","","","","","" +"Component","A high-level attribute for grouping attributes into templates.","","","True","","","","string","","","","" +"fileFormat","Standard file format name or file extension","bai, bam, bed, bim, csv, czi, docx, dose, erate, fam, fastq, fcs, geojson, h5, h5ad, info, mcd, mtx, parquet, pdf, py, rds, rec, svs, tbi, tgz, tsv, txt, vcf, xls, xlsx, zip","","True","","","","string","","","","" +"program","Name of the funding program that supported the generation of data and associated files","AMP AIM, AMP RA/SLE, Community Contribution","","True","","","","string","","","","" +"project","A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.","AIM for RA, ELLIPSS, LOCKIT, RA, SLE, STAMP, UMass V-CoRT","","True","","","list like error","string_list","","","","" +"programPhase","A label noting which AMP RA/SLE program phase generated the data.","I, II","","True","","","list like error","string_list","","","","" +"dataType","High-level classification of the type of data contained in the file, loosely related to the experimental method or biological entity that is being profiled. Select all that apply using a comma-delimited list, though in most cases only a single label is expected. For multimodal datasets with concomitant profiling of biospecimen include 'multimodal'.","cytometry, epigenomics, genomics, histology, immune repertoire profiling, immunostaining, lipidomics, metabolomics, microbiome, multimodal, proteomics, transcriptomics","","True","","","list like error","string_list","","","","" +"dataSubtype","General classification to differentiate between omics profiling modalities. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list, however this is largely only expected in the context of Datasets and files that contain integrated experimental data spanning multiple types.","bulk, none, pseudobulk, single-cell, single-nucleus, spatial","","True","","","list like error","string_list","","","","" +"dataLevel","Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.","1, 2, 3, 4, 5","","True","","","","string","","","","" +"10xProbeSetReference","Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.","Flex Human Transcriptome Probe Set v1.0.1, Flex Human Transcriptome Probe Set v1.1.0, Visium Human Transcriptome v1, Visium Human Transcriptome v2, custom probe set","","True","","","","string","","","","" +"platform","The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.","BD FACSAria Fusion cell sorter, BD FACSAria III, BD FACSCanto, BD FACSCanto II, BD FACSDiscover A8, BD FACSDiscover S8, BD FACSLyric Clinical, BD FACSMelody, BD FACSymphony S6, BD LSRFortessa, Chromium Controller, Chromium GEM-X Single Cell 3' Chip v4, Chromium Next GEM Chip G, Chromium Next GEM Chip H, Chromium Next GEM Chip K, Chromium Next GEM Chip M, Chromium Next GEM Chip Q, Chromium X, Chromium Xo, Chromium iX, CyTOF XT, Cytek Aurora, Cytek Aurora Evo, Fluidigm BioMark, GEM-X Flex Gene Expression Chip, GEM-X OCM 5' Chip, Helios Mass Cytometer, Hyperion, Illumina HiSeq 2500, Illumina HiSeq X Ten, Illumina NextSeq 500, Illumina NovaSeq 6000, Illumina NovaSeq X, Not Applicable, Olink Signature Q100, Sony MA900, Thermo Fisher Attune CytPix, Thermo Fisher Attune NxT, Thermo Fisher Attune Xenith, Visium CytAssist, Xenium, none, unknown","","True","","","list like error","string_list","","","","" +"specimenModality","Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens","multispecimen, single specimen, unknown","","True","","","","string","","","","" +"individualID","Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.","","","True","","","#ClinicalMetadataTemplate unique error^^#BiospecimenMetadataTemplate str^^list like error","string_list","","","","" +"biospecimenID","A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.","","","True","","","#BiospecimenMetadataTemplate unique error^^list like error","string_list","","","","" +"parentBiospecimenID","The biospecimenID associated with the originating biospecimen for derived or child biospecimens.","","","False","","","","string","","","","" +"biospecimenType","A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.","PBMCs, cell line, fibroblast-like synoviocyte, kidney biopsy, none, plasma, primary cell culture, saliva, salivary gland, serum, skin biopsy, skin swab, stool, suction blister cells, suction blister fluid, synovial fluid, synovial tissue, total leukocytes, urine, uvea, whole blood","","True","","","#BiospecimenMetadataTemplate str^^#InVitroBiospecimenMetadataTemplate^^list like error","string_list","","","","" +"primaryCellSource","A label indicating the biological source material from which a primary cell culture was derived.","PBMCs, kidney, pannus-derived dermis, pannus-derived epidermis, salivary gland, synovial tissue, total leukocytes, urine, uvea, whole blood","","True","","","","string","","","","" +"codingLanguage","The coding, aka programming, language(s) contained in the file marked with `resourceType = code`. Select all that apply.","C, C#, C++, Fortan, Java, Julia, Matlab, Python, R, Ruby, SAS","","True","","","","string","","","","" +"visitID","Ordinal ID distinguishing different patient visits.","","","True","","","","string","","","","" +"softwareAndVersion","Relevant software and version used to generate the data file.","BD FACSDiva 8.0.1, Cell Ranger 9.0.1, Cell Ranger ATAC v1.1.0, Cell Ranger v3.0.0, Cell Ranger v3.0.1, Cell Ranger v3.0.2, Cell Ranger v3.1.0, Cell Ranger v4.0.0, Cell Ranger v5.0.0, Cell Ranger v5.0.1, Cell Ranger v6.0.0, Cell Ranger v6.0.1, Cell Ranger v6.0.2, Cell Ranger v6.1.0, Cell Ranger v6.1.1, Cell Ranger v6.1.2, Cell Ranger v7.0.0, Cell Ranger v7.0.1, Cell Ranger v7.1.0, Cell Ranger v7.2.0, Cell Ranger v8.0.0, Cell Ranger v8.0.1, Cell Ranger v9.0.0, Space Ranger 3.0.0, Space Ranger 3.0.1, Space Ranger 3.1.0, Space Ranger 3.1.1, Space Ranger 3.1.2, Space Ranger 3.1.3, demuxlet","","False","","","","string","","","","" +"targetPanel","A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.","","","True","","","","string","","","","" +"targetPanelSize","The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).","","","False","","","int error","integer","","","","" +"nucleicAcidSource","The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.","BCR mRNA, CRISPR protospacer feature barcode, TCR mRNA, Tn5-accessible gDNA, antigen capture barcode, gDNA, globin-depleted RNA, intracellular protein feature barcode, multiplexing oligo, poly(A) RNA, rRNA-depleted RNA, surface protein feature barcode","","True","","","list like error","string_list","","","","" +"biospecimenSubtype","Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.","FFPE tissue, PFA-fixed tissue, cell or tissue lysate, cell suspension, flow-sorted cells, fresh tissue, frozen tissue, nuclei suspension, supernatant","","False","","","","string","","","","" +"cellRangerOutput","10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz","Not Applicable, filtered MEX, filtered_feature_bc_matrix, filtered_peak_bc_matrix, raw MEX, raw_feature_bc_matrix, raw_peak_bc_matrix","","True","","","","string","","","","" +"alignmentReference","The genomic/transcriptomic reference used for performing read alignment against.","10x Cell Ranger Human GRCh38 2020-A, 10x Cell Ranger Human GRCh38 2024-A, GRCh38, modified GRCh38, unknown, vdj_GRCh38_alts_ensembl-4.0.0","","True","","","","string","","","","" +"libraryPrepMethod","Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.","10x Chromium Fixed RNA Human Transcriptome, 10x Chromium GEM-X Single Cell 3' v4, 10x Chromium GEM-X Single Cell 5' v3, 10x Chromium Next GEM Single Cell 3', 10x Chromium Next GEM Single Cell 3' 3.1, 10x Chromium Next GEM Single Cell 5' v1.1, 10x Chromium Next GEM Single Cell 5' v2, 10x Chromium Next GEM Single Cell ATAC v2, 10x Chromium Single Cell Human BCR, 10x Chromium Single Cell Human TCR, 10x GEM-X Flex Gene Expression Human, 10x GEM-X Universal 5' Gene Expression v3, CEL-Seq2, Chromium Next GEM Single Cell ATAC v1.1, Fluidigm C1 HT, NEBNext Human Immune Sequencing Kit, NEBNext Ultra II Directional RNA Library, Nextera XT, Nextera XT DNA, QIAseq miRNA Library, SMART-Seq Human BCR with UMI, SMART-Seq Human TCR with UMI, SMART-Seq v4 Ultra Low Input RNA, SMARTer Stranded Total RNA v2, Takara Human BCR profiling for Illumina, Takara Human TCR profiling for Illumina, Takara Human TCRv2 profiling for Illumina, Takara Human scTCR profiling for Illumina, TruSeq Stranded mRNA, custom DASH-treatment, in-house library prep","","True","","","","string","","","","" +"datasetType","High-level classification of dataset entity distinguishing between datasets compiled for a specific publication or as a general data resource.","experimental, publication","","True","","","","string","","","","" +"acknowledgmentStatement","A Dataset-specific attribute specifying the path to the wiki subpage within the ARK Portal - backend project that contains the acknowledgement statement that must be included in publications using data from the given dataset as a stipulation of the conditions of use.","syn26710600/wiki/619685","","True","","","","string","","","","" +"ARKRelease","A Dataset-specific attribute specifying the ARK Portal release in which this dataset was first made available to the public.","1.0, 2.0, 2024.06.R1, 2024.07.R1, 2024.08.R1, 2024.09.R1, 2024.10.R1, 2024.12.R1, 2025.01.R1, 2025.02.R1, 2025.03.R1, 2025.04.R1, 2025.05.R1, 2025.06.R1, 2025.07.R1, 2025.08.R1, 2025.09.R1, 2025.10.R1, 2025.11.R1, 2025.12.R1","","True","","","","string","","","","" +"RObjectClass","Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.","ROCR prediction.object, Seurat object, SummarizedExperiment, Symphony reference, data.frame, list, matrix, sparse matrix, vector","","False","","","","string","","","","" +"diagnosis","A high-level classifier indicating the disease status of an individual.","At-Risk RA, Not Applicable, OA, RA, SLE, Sjogren's disease, control, cutaneous lupus erythematosus, dermatomyositis, discoid lupus erythematosus, lupus nephritis, psoriasis, psoriatic arthritis, scleroderma, unknown, vitiligo","","True","","","list like error","string_list","","","","" +"libraryID","A library label or name, unique within an experiment, used to distinguish sequencing libraries.","","","True","","","","string","","","","" +"associatedCodeURL","A URL to the repository where associated code is available.","","","False","","","","string","uri","","","" +"FMAID","Functional Model of Anatomy ontology ID corresponding to the anatomical site and origin of the biospecimen. This attribute is assigned by ARK Portal data managers.","","","True","","https://bioportal.bioontology.org/ontologies/FMA/?p=summary","int error","integer","","","","" +"skinSiteStatus","Disease manifestation status of skin biospecimen.","healthy control, lesional, lesional proximal, non-lesional","","True","","","","string","","","","" +"salivaCollectionProcedure","Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.","stimulated, unstimulated","","True","","","","string","","","","" +"synovialCollectionProcedure","Classification of procedure for synovial tissue collection.","arthroplasty, biopsy, synovectomy, unknown","","True","","","","string","","","","" +"publicationType","General classification of publication.","correction, peer-reviewed, pre-print","","True","","","","string","","","","" +"title","Title of the publication.","","","True","","","","string","","","","" +"journal","Journal in which the publication was released","","","True","","","","string","","","","" +"publicationDate","The publication date extracted from PubMed database","","","True","","","","string","","","","" +"DOI","Digital object identifier","","","True","","","","string","uri-reference","","","" +"anatomicalSite","The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.","left 2nd MCP joint, left ankle joint, left hip joint, left knee joint, left wrist joint, other site, right 1st MTP joint, right 2nd MCP joint, right 2nd MTP joint, right 3rd MCP joint, right ankle joint, right hip joint, right knee joint, right wrist joint, unknown","","True","","","","string","","","","" +"vitiligoPattern","A high-level classification of vitiligo based on the distribution and patterning of lesions across the body.","mixed, non-segmental, segmental, unclassified","","False","","","","string","","","","" +"vitiligoPhenotype","Classification of vitiligo lesions which correlate with autoimmune activity and result in specific skin and depigmentation manifestations at the lesion site. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list.","active, confetti, inflammatory, none, trichrome","","True","","","list like error","string_list","","","","" +"psoriasisType","General type classification of psoriasis disease manifestation.","erythrodermic, guttate, inverse, plaque, pustular","","False","","","","string","","","","" +"ageDiagnosis","Age at which subject was diagnosed with `diagnosis`. If providing this value be sure the unit matches that used for `age`.","","","False","","","num error","number","","","","" +"age","Age at which subject was enrolled in study or age at corresponding visit and data collection event. If value unknown, enter '-1'.","","","True","","","num error","number","","","","" +"ageUnits","The unit of measure used for `ageEnrollment` and `ageDiagnosis`","months, years","","True","","","","string","","","","" +"sex","A textual description of a person's sex at birth.","female, intersex, male, unknown","","True","","","","string","","","","" +"height","Standing height of subject.","","","True","","","num error","number","","","","" +"race","A textual description of a person's race.","American Indian or Alaska Native, Asian, Black or African American, Hispanic, Mixed Race, Native Hawaiian or Other Pacific Islander, White, other, unknown","","False","","","","string","","","","" +"ethnicity","The ethnicity of a person.","Hispanic or Latino, Not Hispanic or Latino, unknown","","False","","","","string","","","","" +"heightUnits","Unit of measure of value provided for `height`.","centimeters, feet, inches, meters","","True","","","","string","","","","" +"weight","Weight of subject. If value unknown, enter '-1'.","","","True","","","num error","number","","","","" +"weightUnits","Abbreviated unit of measure of value provided for `weight`.","g, kg, lb, oz","","True","","","","string","","","","" +"comorbidities","Any diseases or medical condition that is simultaneously present in addition to `diagnosis`.","Hashimoto's Thyroiditis, autoimmune thyroid disease, cardiovascular disease, diabetes, inflammatory bowel disease, multiple sclerosis, other, psoriasis, psoriatic arthritis, pulmonary disease, rheumatoid arthritis, systemic lupus erythematosus","","False","","","list like error","string_list","","","","" +"diabetesType","Type of diabetes mellitus.","gestational, type 1, type 2, unknown","","False","","","","string","","","","" +"VIDA","Vitiligo Disease Activity Score, a six-point scale that evaluates the activity of vitiligo","","","False","","","","string","","","","" +"VASI","Total body Vitiligo Area Severity Index, a measure of the percentage of vitiligo involvement across the body calculated in terms of hand units.","","","False","","","","string","","","","" +"VETI","Vitiligo Extent Tensity Index measures the extent of vitiligo by a numerical score and combines analysis of extensity and severity of vitiligo to produce a constant and reproducible number.","","","False","","https://doi.org/10.5826/dpc.0404a18","num error","number","","","","" +"PASI","Psoriasis Area and Severity Index, is a measurement of the discoloration, thickness, scaling, and coverage of psoriasis plaques.","","","False","","","","string","","","","" +"CDASI","Cutaneous Dermatomyositis Disease Area and Severity Index, is a measurement used to characterize cutaneous dermatomyositis severity.","","","False","","","","string","","","","" +"inputCellCount","An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.","","","True","","","int error","integer","","","","" +"totalReads","Total number of reads sequenced from the library.","","","True","","","int error","integer","","","","" +"FACSPopulation","A description of the marker gating strategy used to derive the population cells with FACS.","","","True","","","","string","","","","" +"cellType","The cell type name from Cell Ontology for the corresponding CL identifier.","","","False","","","","string","","","","" +"userDefinedCellType","User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.","","","False","","","","string","","","","" +"krennInflammatory","A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennLining","A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennStroma","A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"krennSynovitisScore","The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.","","","False","","https://doi.org/10.1111/j.1365-2559.2006.02508.x","num error","number","","","","" +"datasetStatus","A categorical label indicating the status of an ARK Portal dataset. This is applied to improve downstream management of datasets as well as various ETL workflows.","deprecated, released, test, under peer review, unreleased","","True","","","","string","","","","" +"sampleProcessingBatch","A label indicating batching of sample processing or preparation that occurs prior to data collection.","","","False","","","","string","","","","" +"dataCollectionBatch","A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.","","","False","","","","string","","","","" +"metadataStandards","Metadata standards used to generate the metadata","ARK data model, user-defined","","False","","","","string","","","","" +"skinSunExposure","For skin-based biospecimen, this attribute indicates whether the sample was collected from an anatomical site that does or does not receive routine sun exposure.","not sun exposed, sun exposed","","False","","","","string","","","","" +"sequencingSpikeIn","Pre-made sequencing libraries may be added to your sequencing run to improve sequencing results. If such a 'spike-in' library was used, this attribute specifies the name of the library and the percentage of the spike-in library used in the sequencing run.","","","False","","","","string","","","","" +"librarySpikeIn","Pre-made collections of nucleic acid fragments can be added to samples during the library construction process to improve downstream quantification and statistical analyses. If such a 'spike-in' was used, this attribute specifies the name and manufacturer of the spike-in.","","","False","","","","string","","","","" +"readLength","The number of base pairs (bp) sequenced for reads in a fastq file.","","","False","","","int error","integer","","","","" +"plateID","An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.","","","False","","","","string","","","","" +"processedDataType","A label used for file annotations to provide a brief description of the processed data file.","barcode counts, differential expression results, epigenomic peaks, gene counts","","False","","","list like error","string_list","","","","" +"eventCount","The total number of events detected and captured in the corresponding FCS file. In cytometry, an event corresponds to particles detected and measured by the instrument. This number corresponds to the number of rows in the FCS file and is expected to be a whole integer number.","","","False","","","int error","integer","","","","" +"notes","Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.","","","False","","","","string","","","","" +"species","The genus species of sample or subject origin.","Homo sapiens","","True","","","","string","","","","" +"treatment","A short descriptive label indicating what experimental treatments have been applied to a biospecimen before data capture. Please include the word 'control' to indicate specific treatments that are intended to serve as a control group.","","","True","","","","string","","","","" +"treatmentTimepoint","Where applicable, specify the timepoint relative to treatment to distinguish different sets of samples that have undergone the same treatment but for different lengths of time. REQUIRED: please specify the unit of time, e.g., secs, mins, hrs, days, etc.","","","False","","","","string","","","","" +"slideID","A distinct label or name, unique within an experiment, assigned to an imaging slides.","","","True","","","","string","","","","" +"altSampleID","An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.","","","False","","","","string","","","","" +"sampleCollectionBatch","A label indicating batching of sample collection or experiment execution that occurs prior to data collection.","","","False","","","","string","","","","" +"associatedAccession","This is a File and Dataset annotation attribute indicating additional accessions (i.e., unique identifiers) associated with the data when the data has also been submitted to or can be found in other repositories such as GEO, SRA, dbGaP, etc.","","","False","","","list like error","string_list","","","","" +"percentCellViability","A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.","","","True","","","inRange 50 100 error","integer","","50","100","" +"sequencingSaturation","A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.","","","False","","https://kb.10xgenomics.com/hc/en-us/articles/115005062366-What-is-sequencing-saturation","inRange 0 1 error","number","","0","1","" +"publicationSynID","The synID of the corresponding Synapse entity that stores metadata about the publication. This is used to differentiate publication-specific files, often consisting of level 4 processed data and expanded subject metadata, in a publication dataset that also includes raw or minimally processed files from experimental datasets. This provides an easy way to distinguish and select for the publication-specific data from which the research findings were derived. When this attribute is used to annotate a Dataset it serves as a way to directly link the Dataset entity with the publication metadata stored in Synapse.","","","False","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"associatedDataset","The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.","","","False","","","list like::regex search ^syn[0-9]{8} error","string_list","","","","^syn[0-9]{8}" +"custom10xProbeSetSynID","If custom modified probe sets were used for collecting 10x Chromium Flex scRNA-seq data, then the probe reference files should be grouped as a zip or tar archive and uploaded as metadata. This attribute links the experimental data back to the probe set archive file via a synapse ID.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"targetPanelSynID","In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"datasetDescription","A Dataset-specific attribute specifying the synID of the folder that contains a wiki write-up of the dataset description. This wiki content will be surfaced on the ARK Portal frontend site.","","","True","","","regex search ^syn[0-9]{8} error","string","","","","^syn[0-9]{8}" +"DOID","Disease ontology identifier associated with `diagnosis`. Attribute values are applied by ARK Portal data managers.","","","True","","","regex search ^DOID error","string","","","","^DOID" +"BRENDA","BRENDA Tissue and Enzyme Source Ontology ID (BTO) corresponding to the `biospecimenType`. Attribute values are applied by ARK Portal data managers.","","","True","","https://bioportal.bioontology.org/ontologies/BTO","regex search ^BTO error","string","","","","^BTO" +"year","Year (YYYY) in which the paper was published.","","","True","","","regex search [1-2][0-9]{3} error","string","","","","[1-2][0-9]{3}" +"PMID","PubMed(R) Identifier","","","True","","","regex search ^PMID error","string","","","","^PMID" +"ImmPortAccession","Accession to corresponding information in ImmPort.","","","False","","","regex search ^SDY error","string","","","","^SDY" +"cellOntologyID","Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.","","","False","","","regex search ^CL: error","string","","","","^CL:" +"PMCID","Pubmed Central Identifier, formatted as a compact URI string that will automatically resolve into a URL based on the Synapse bioregistry; e.g., pmc:PMCxxxxxxxx","","","False","","","regex search ^pmc:PMC[0-9]{8} error","string","","","","^pmc:PMC[0-9]{8}" diff --git a/model_json_schema/ark.BiospecimenMetadataTemplate.schema.json b/model_json_schema/ark.BiospecimenMetadataTemplate.schema.json index 5b65cadd..a76e58de 100644 --- a/model_json_schema/ark.BiospecimenMetadataTemplate.schema.json +++ b/model_json_schema/ark.BiospecimenMetadataTemplate.schema.json @@ -1,1152 +1,1080 @@ { + "$id": "http://example.com/BiospecimenMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "notes": {}, - "program": { - "enum": [ - "AMP AIM", - "Community Contribution", - "AMP RA/SLE" - ] - }, - "project": { - "type": "array", - "items": { - "enum": [ - "UMass V-CoRT", - "RA", - "LOCKIT", - "AIM for RA", - "ELLIPSS", - "STAMP", - "SLE" - ] - }, - "maxItems": 7 - }, - "biospecimenType": { - "enum": [ - "salivary gland", - "fibroblast-like synoviocyte", - "suction blister cells", - "PBMCs", - "none", - "saliva", - "skin swab", - "skin biopsy", - "primary cell culture", - "urine", - "synovial tissue", - "whole blood", - "kidney biopsy", - "cell line", - "synovial fluid", - "plasma", - "uvea", - "suction blister fluid", - "serum", - "stool", - "total leukocytes" - ] - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "biospecimenID": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "individualID": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "altSampleID": {}, - "parentBiospecimenID": {}, - "sampleCollectionBatch": {}, - "biospecimenSubtype": { - "enum": [ - "nuclei suspension", - "cell or tissue lysate", - "FFPE tissue", - "fresh tissue", - "flow-sorted cells", - "frozen tissue", - "cell suspension", - "PFA-fixed tissue", - "supernatant", - "" - ] - }, - "visitID": {}, - "skinSiteStatus": { - "enum": [ - "healthy control", - "lesional proximal", - "lesional", - "non-lesional", - "" - ] - }, - "anatomicalSite": { - "enum": [ - "right wrist joint", - "right 1st MTP joint", - "right 2nd MTP joint", - "right 2nd MCP joint", - "left hip joint", - "right 3rd MCP joint", - "right hip joint", - "left ankle joint", - "left wrist joint", - "right ankle joint", - "right knee joint", - "left 2nd MCP joint", - "unknown", - "other site", - "left knee joint", - "" - ] - }, - "salivaCollectionProcedure": { - "enum": [ - "unstimulated", - "stimulated", - "" - ] - }, - "primaryCellSource": { - "enum": [ - "salivary gland", - "pannus-derived epidermis", - "uvea", - "PBMCs", - "synovial tissue", - "kidney", - "whole blood", - "pannus-derived dermis", - "urine", - "total leukocytes", - "" - ] - }, - "cellType": {}, - "cellOntologyID": {}, - "krennLining": {}, - "krennInflammatory": {}, - "synovialCollectionProcedure": { - "enum": [ - "unknown", - "biopsy", - "synovectomy", - "arthroplasty", - "" - ] - }, - "krennStroma": {}, - "krennSynovitisScore": {}, - "userDefinedCellType": {}, - "FACSPopulation": {} - }, - "required": [ - "program", - "project", - "biospecimenType", - "Component", - "biospecimenID", - "individualID" - ], "allOf": [ { "if": { "properties": { - "program": { + "BiospecimenSubtype": { "enum": [ - "AMP AIM" + "Cellsuspension" ] } - }, - "required": [ - "program" - ] + } }, "then": { "properties": { - "visitID": { + "CellOntologyID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "visitID" + "CellOntologyID" ] } }, { "if": { "properties": { - "program": { + "BiospecimenSubtype": { "enum": [ - "AMP RA/SLE" + "Flow-sortedcells" ] } - }, - "required": [ - "program" - ] + } }, "then": { "properties": { - "visitID": { + "CellOntologyID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "visitID" + "CellOntologyID" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "suction blister cells" + "Cellline" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "skinSiteStatus": { - "enum": [ - "healthy control", - "lesional proximal", - "lesional", - "non-lesional" - ] + "CellOntologyID": { + "not": { + "type": "null" + } } }, "required": [ - "skinSiteStatus" + "CellOntologyID" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "skin swab" + "Primarycellculture" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "skinSiteStatus": { - "enum": [ - "healthy control", - "lesional proximal", - "lesional", - "non-lesional" - ] + "CellOntologyID": { + "not": { + "type": "null" + } } }, "required": [ - "skinSiteStatus" + "CellOntologyID" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenSubtype": { "enum": [ - "skin biopsy" + "Cellsuspension" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "skinSiteStatus": { - "enum": [ - "healthy control", - "lesional proximal", - "lesional", - "non-lesional" - ] + "CellType": { + "not": { + "type": "null" + } } }, "required": [ - "skinSiteStatus" + "CellType" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenSubtype": { "enum": [ - "suction blister fluid" + "Flow-sortedcells" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "skinSiteStatus": { - "enum": [ - "healthy control", - "lesional proximal", - "lesional", - "non-lesional" - ] + "CellType": { + "not": { + "type": "null" + } } }, "required": [ - "skinSiteStatus" + "CellType" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "suction blister cells" + "Cellline" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right wrist joint", - "right 1st MTP joint", - "right 2nd MTP joint", - "right 2nd MCP joint", - "left hip joint", - "right 3rd MCP joint", - "right hip joint", - "left ankle joint", - "left wrist joint", - "right ankle joint", - "right knee joint", - "left 2nd MCP joint", - "unknown", - "other site", - "left knee joint" - ] + "CellType": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "CellType" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "skin swab" + "Primarycellculture" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right wrist joint", - "right 1st MTP joint", - "right 2nd MTP joint", - "right 2nd MCP joint", - "left hip joint", - "right 3rd MCP joint", - "right hip joint", - "left ankle joint", - "left wrist joint", - "right ankle joint", - "right knee joint", - "left 2nd MCP joint", - "unknown", - "other site", - "left knee joint" - ] + "CellType": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "CellType" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenSubtype": { "enum": [ - "skin biopsy" + "Cellsuspension" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right wrist joint", - "right 1st MTP joint", - "right 2nd MTP joint", - "right 2nd MCP joint", - "left hip joint", - "right 3rd MCP joint", - "right hip joint", - "left ankle joint", - "left wrist joint", - "right ankle joint", - "right knee joint", - "left 2nd MCP joint", - "unknown", - "other site", - "left knee joint" - ] + "UserDefinedCellType": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "UserDefinedCellType" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenSubtype": { "enum": [ - "synovial tissue" + "Flow-sortedcells" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right wrist joint", - "right 1st MTP joint", - "right 2nd MTP joint", - "right 2nd MCP joint", - "left hip joint", - "right 3rd MCP joint", - "right hip joint", - "left ankle joint", - "left wrist joint", - "right ankle joint", - "right knee joint", - "left 2nd MCP joint", - "unknown", - "other site", - "left knee joint" - ] + "UserDefinedCellType": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "UserDefinedCellType" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenSubtype": { "enum": [ - "synovial fluid" + "Flow-sortedcells" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right wrist joint", - "right 1st MTP joint", - "right 2nd MTP joint", - "right 2nd MCP joint", - "left hip joint", - "right 3rd MCP joint", - "right hip joint", - "left ankle joint", - "left wrist joint", - "right ankle joint", - "right knee joint", - "left 2nd MCP joint", - "unknown", - "other site", - "left knee joint" - ] + "FACSPopulation": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "FACSPopulation" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "suction blister fluid" + "Primarycellculture" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right wrist joint", - "right 1st MTP joint", - "right 2nd MTP joint", - "right 2nd MCP joint", - "left hip joint", - "right 3rd MCP joint", - "right hip joint", - "left ankle joint", - "left wrist joint", - "right ankle joint", - "right knee joint", - "left 2nd MCP joint", - "unknown", - "other site", - "left knee joint" - ] + "PrimaryCellSource": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "PrimaryCellSource" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "saliva" + "Saliva" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "salivaCollectionProcedure": { - "enum": [ - "unstimulated", - "stimulated" - ] + "SalivaCollectionProcedure": { + "not": { + "type": "null" + } } }, "required": [ - "salivaCollectionProcedure" + "SalivaCollectionProcedure" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "primary cell culture" + "Skinbiopsy" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "primaryCellSource": { - "enum": [ - "salivary gland", - "pannus-derived epidermis", - "uvea", - "PBMCs", - "synovial tissue", - "kidney", - "whole blood", - "pannus-derived dermis", - "urine", - "total leukocytes" - ] + "AnatomicalSite": { + "not": { + "type": "null" + } } }, "required": [ - "primaryCellSource" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "primary cell culture" + "Skinswab" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "cellType": {} + "AnatomicalSite": { + "not": { + "type": "null" + } + } }, "required": [ - "cellType" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "cell line" + "Suctionblistercells" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "cellType": {} + "AnatomicalSite": { + "not": { + "type": "null" + } + } }, "required": [ - "cellType" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "BiospecimenType": { "enum": [ - "flow-sorted cells" + "Suctionblisterfluid" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "cellType": {} + "AnatomicalSite": { + "not": { + "type": "null" + } + } }, "required": [ - "cellType" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "BiospecimenType": { "enum": [ - "cell suspension" + "Synovialfluid" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "cellType": {} + "AnatomicalSite": { + "not": { + "type": "null" + } + } }, "required": [ - "cellType" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "primary cell culture" + "Synovialtissue" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "cellOntologyID": {} + "AnatomicalSite": { + "not": { + "type": "null" + } + } }, "required": [ - "cellOntologyID" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenType": { + "PrimaryCellSource": { "enum": [ - "cell line" + "Synovialtissue" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "cellOntologyID": {} + "AnatomicalSite": { + "not": { + "type": "null" + } + } }, "required": [ - "cellOntologyID" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "BiospecimenType": { "enum": [ - "flow-sorted cells" + "Skinbiopsy" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "cellOntologyID": {} + "SkinSiteStatus": { + "not": { + "type": "null" + } + } }, "required": [ - "cellOntologyID" + "SkinSiteStatus" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "BiospecimenType": { "enum": [ - "cell suspension" + "Skinswab" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "cellOntologyID": {} + "SkinSiteStatus": { + "not": { + "type": "null" + } + } }, "required": [ - "cellOntologyID" + "SkinSiteStatus" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Suctionblistercells" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennLining": {} + "SkinSiteStatus": { + "not": { + "type": "null" + } + } }, "required": [ - "krennLining" + "SkinSiteStatus" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Suctionblisterfluid" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennLining": {} + "SkinSiteStatus": { + "not": { + "type": "null" + } + } }, "required": [ - "krennLining" + "SkinSiteStatus" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Synovialfluid" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennInflammatory": {} + "SynovialCollectionProcedure": { + "not": { + "type": "null" + } + } }, "required": [ - "krennInflammatory" + "SynovialCollectionProcedure" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennInflammatory": {} + "SynovialCollectionProcedure": { + "not": { + "type": "null" + } + } }, "required": [ - "krennInflammatory" + "SynovialCollectionProcedure" ] } }, { "if": { "properties": { - "primaryCellSource": { + "PrimaryCellSource": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "synovialCollectionProcedure": { - "enum": [ - "unknown", - "biopsy", - "synovectomy", - "arthroplasty" - ] + "SynovialCollectionProcedure": { + "not": { + "type": "null" + } } }, "required": [ - "synovialCollectionProcedure" + "SynovialCollectionProcedure" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "synovialCollectionProcedure": { - "enum": [ - "unknown", - "biopsy", - "synovectomy", - "arthroplasty" - ] + "KrennInflammatory": { + "not": { + "type": "null" + } } }, "required": [ - "synovialCollectionProcedure" + "KrennInflammatory" ] } }, { "if": { "properties": { - "biospecimenType": { + "PrimaryCellSource": { "enum": [ - "synovial fluid" + "Synovialtissue" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "synovialCollectionProcedure": { - "enum": [ - "unknown", - "biopsy", - "synovectomy", - "arthroplasty" - ] + "KrennInflammatory": { + "not": { + "type": "null" + } } }, "required": [ - "synovialCollectionProcedure" + "KrennInflammatory" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennStroma": {} + "KrennLining": { + "not": { + "type": "null" + } + } }, "required": [ - "krennStroma" + "KrennLining" ] } }, { "if": { "properties": { - "primaryCellSource": { + "PrimaryCellSource": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennStroma": {} + "KrennLining": { + "not": { + "type": "null" + } + } }, "required": [ - "krennStroma" + "KrennLining" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennSynovitisScore": {} + "KrennStroma": { + "not": { + "type": "null" + } + } }, "required": [ - "krennSynovitisScore" + "KrennStroma" ] } }, { "if": { "properties": { - "primaryCellSource": { + "PrimaryCellSource": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennSynovitisScore": {} + "KrennStroma": { + "not": { + "type": "null" + } + } }, "required": [ - "krennSynovitisScore" + "KrennStroma" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "BiospecimenType": { "enum": [ - "flow-sorted cells" + "Synovialtissue" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "userDefinedCellType": {} + "KrennSynovitisScore": { + "not": { + "type": "null" + } + } }, "required": [ - "userDefinedCellType" + "KrennSynovitisScore" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "PrimaryCellSource": { "enum": [ - "cell suspension" + "Synovialtissue" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "userDefinedCellType": {} + "KrennSynovitisScore": { + "not": { + "type": "null" + } + } }, "required": [ - "userDefinedCellType" + "KrennSynovitisScore" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "Program": { "enum": [ - "flow-sorted cells" + "AMPAIM" ] } + } + }, + "then": { + "properties": { + "VisitID": { + "not": { + "type": "null" + } + } }, "required": [ - "biospecimenSubtype" + "VisitID" ] + } + }, + { + "if": { + "properties": { + "Program": { + "enum": [ + "AMPRA/SLE" + ] + } + } }, "then": { "properties": { - "FACSPopulation": { + "VisitID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "FACSPopulation" + "VisitID" ] } } - ] + ], + "description": "A general template outlining metadata to be collected for biospecimen profiled in a dataset.", + "properties": { + "AltSampleID": { + "description": "An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.", + "title": "altSampleID", + "type": "string" + }, + "AnatomicalSite": { + "description": "The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.", + "enum": [ + "left 2nd MCP joint", + "left ankle joint", + "left hip joint", + "left knee joint", + "left wrist joint", + "other site", + "right 1st MTP joint", + "right 2nd MCP joint", + "right 2nd MTP joint", + "right 3rd MCP joint", + "right ankle joint", + "right hip joint", + "right knee joint", + "right wrist joint", + "unknown" + ], + "title": "anatomicalSite" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "biospecimenID", + "type": "string" + }, + "BiospecimenSubtype": { + "description": "Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.", + "enum": [ + "FFPE tissue", + "PFA-fixed tissue", + "cell or tissue lysate", + "cell suspension", + "flow-sorted cells", + "fresh tissue", + "frozen tissue", + "nuclei suspension", + "supernatant" + ], + "title": "biospecimenSubtype" + }, + "BiospecimenType": { + "description": "A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.", + "enum": [ + "PBMCs", + "cell line", + "fibroblast-like synoviocyte", + "kidney biopsy", + "none", + "plasma", + "primary cell culture", + "saliva", + "salivary gland", + "serum", + "skin biopsy", + "skin swab", + "stool", + "suction blister cells", + "suction blister fluid", + "synovial fluid", + "synovial tissue", + "total leukocytes", + "urine", + "uvea", + "whole blood" + ], + "title": "biospecimenType" + }, + "CellOntologyID": { + "description": "Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.", + "pattern": "^CL:", + "title": "cellOntologyID", + "type": "string" + }, + "CellType": { + "description": "The cell type name from Cell Ontology for the corresponding CL identifier.", + "title": "cellType", + "type": "string" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "FACSPopulation": { + "description": "A description of the marker gating strategy used to derive the population cells with FACS.", + "title": "FACSPopulation", + "type": "string" + }, + "IndividualID": { + "description": "Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "individualID", + "type": "string" + }, + "KrennInflammatory": { + "description": "A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.", + "title": "krennInflammatory", + "type": "number" + }, + "KrennLining": { + "description": "A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.", + "title": "krennLining", + "type": "number" + }, + "KrennStroma": { + "description": "A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.", + "title": "krennStroma", + "type": "number" + }, + "KrennSynovitisScore": { + "description": "The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.", + "title": "krennSynovitisScore", + "type": "number" + }, + "Notes": { + "description": "Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.", + "title": "notes", + "type": "string" + }, + "ParentBiospecimenID": { + "description": "The biospecimenID associated with the originating biospecimen for derived or child biospecimens.", + "title": "parentBiospecimenID", + "type": "string" + }, + "PrimaryCellSource": { + "description": "A label indicating the biological source material from which a primary cell culture was derived.", + "enum": [ + "PBMCs", + "kidney", + "pannus-derived dermis", + "pannus-derived epidermis", + "salivary gland", + "synovial tissue", + "total leukocytes", + "urine", + "uvea", + "whole blood" + ], + "title": "primaryCellSource" + }, + "Program": { + "description": "Name of the funding program that supported the generation of data and associated files", + "enum": [ + "AMP AIM", + "AMP RA/SLE", + "Community Contribution" + ], + "title": "program" + }, + "Project": { + "description": "A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.", + "enum": [ + "AIM for RA", + "ELLIPSS", + "LOCKIT", + "RA", + "SLE", + "STAMP", + "UMass V-CoRT" + ], + "title": "project" + }, + "SalivaCollectionProcedure": { + "description": "Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.", + "enum": [ + "stimulated", + "unstimulated" + ], + "title": "salivaCollectionProcedure" + }, + "SampleCollectionBatch": { + "description": "A label indicating batching of sample collection or experiment execution that occurs prior to data collection.", + "title": "sampleCollectionBatch", + "type": "string" + }, + "SkinSiteStatus": { + "description": "Disease manifestation status of skin biospecimen.", + "enum": [ + "healthy control", + "lesional", + "lesional proximal", + "non-lesional" + ], + "title": "skinSiteStatus" + }, + "SynovialCollectionProcedure": { + "description": "Classification of procedure for synovial tissue collection.", + "enum": [ + "arthroplasty", + "biopsy", + "synovectomy", + "unknown" + ], + "title": "synovialCollectionProcedure" + }, + "UserDefinedCellType": { + "description": "User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.", + "title": "userDefinedCellType", + "type": "string" + }, + "VisitID": { + "description": "Ordinal ID distinguishing different patient visits.", + "title": "visitID", + "type": "string" + } + }, + "required": [ + "BiospecimenID", + "BiospecimenType", + "Component", + "IndividualID", + "Program", + "Project" + ], + "title": "BiospecimenMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.BulkATAC-seqAssayMetadataTemplate.schema.json b/model_json_schema/ark.BulkATAC-seqAssayMetadataTemplate.schema.json index eb7004a2..ba138499 100644 --- a/model_json_schema/ark.BulkATAC-seqAssayMetadataTemplate.schema.json +++ b/model_json_schema/ark.BulkATAC-seqAssayMetadataTemplate.schema.json @@ -1,330 +1,347 @@ { + "$id": "http://example.com/BulkATAC-seqAssayMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "totalReads": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "specimenModality": { - "enum": [ - "unknown", - "single specimen", - "multispecimen" - ] - }, - "softwareAndVersion": { - "enum": [ - "Cell Ranger v6.0.1", - "Space Ranger 3.1.0", - "Cell Ranger v5.0.1", - "Cell Ranger v9.0.0", - "Cell Ranger v6.0.0", - "Cell Ranger v7.1.0", - "Cell Ranger v3.0.1", - "Space Ranger 3.1.1", - "Cell Ranger v5.0.0", - "Cell Ranger v6.1.2", - "Cell Ranger v8.0.1", - "Cell Ranger ATAC v1.1.0", - "Space Ranger 3.0.1", - "Space Ranger 3.1.3", - "BD FACSDiva 8.0.1", - "Cell Ranger v7.2.0", - "demuxlet", - "Cell Ranger v6.1.0", - "Cell Ranger v3.0.0", - "Cell Ranger v7.0.0", - "Cell Ranger v4.0.0", - "Cell Ranger v3.1.0", - "Space Ranger 3.0.0", - "Cell Ranger 9.0.1", - "Cell Ranger v7.0.1", - "Cell Ranger v6.1.1", - "Cell Ranger v6.0.2", - "Cell Ranger v8.0.0", - "Space Ranger 3.1.2", - "Cell Ranger v3.0.2", - "" - ] - }, - "libraryPrepMethod": { - "enum": [ - "SMART-Seq Human BCR with UMI", - "NEBNext Human Immune Sequencing Kit", - "10x Chromium Next GEM Single Cell 5' v1.1", - "Chromium Next GEM Single Cell ATAC v1.1", - "10x Chromium GEM-X Single Cell 5' v3", - "10x Chromium Fixed RNA Human Transcriptome", - "QIAseq miRNA Library", - "CEL-Seq2", - "Nextera XT", - "Takara Human BCR profiling for Illumina", - "Takara Human TCRv2 profiling for Illumina", - "SMART-Seq v4 Ultra Low Input RNA", - "custom DASH-treatment", - "10x Chromium Next GEM Single Cell ATAC v2", - "10x Chromium GEM-X Single Cell 3' v4", - "Takara Human scTCR profiling for Illumina", - "in-house library prep", - "10x GEM-X Flex Gene Expression Human", - "10x Chromium Next GEM Single Cell 3'", - "Fluidigm C1 HT", - "TruSeq Stranded mRNA", - "SMARTer Stranded Total RNA v2", - "10x Chromium Single Cell Human TCR", - "10x Chromium Next GEM Single Cell 3' 3.1", - "10x Chromium Single Cell Human BCR", - "Nextera XT DNA", - "10x Chromium Next GEM Single Cell 5' v2", - "SMART-Seq Human TCR with UMI", - "Takara Human TCR profiling for Illumina", - "10x GEM-X Universal 5' Gene Expression v3", - "NEBNext Ultra II Directional RNA Library" - ] - }, - "dataCollectionBatch": {}, - "nucleicAcidSource": { - "type": "array", - "items": { - "enum": [ - "TCR mRNA", - "intracellular protein feature barcode", - "Tn5-accessible gDNA", - "antigen capture barcode", - "multiplexing oligo", - "poly(A) RNA", - "rRNA-depleted RNA", - "CRISPR protospacer feature barcode", - "BCR mRNA", - "gDNA", - "surface protein feature barcode", - "globin-depleted RNA" - ] - }, - "maxItems": 12 - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "sampleProcessingBatch": {}, - "assay": { - "type": "array", - "items": { - "enum": [ - "Xenium", - "snRNASeq", - "Visium", - "WGS", - "feature barcode sequencing", - "imaging mass cytometry", - "LC-MS/MS", - "NULISA", - "VDJSeq", - "Olink Target 96", - "WES", - "kiloplex", - "ASAPSeq", - "flow cytometry", - "snATACSeq", - "SomaScan", - "GenePS SeqFISH", - "Olink Reveal", - "Olink Explore HT", - "RNASeq", - "Olink Target 48", - "imaging mass spectrometry", - "CITESeq", - "CyTOF", - "SNP array", - "scVDJSeq", - "scRNASeq", - "CE-MS", - "Olink Flex", - "CosMX", - "serial IHC", - "Olink Focus", - "multiplexed ELISA", - "H&E" - ] - }, - "maxItems": 34 - }, - "alignmentReference": { - "enum": [ - "10x Cell Ranger Human GRCh38 2024-A", - "vdj_GRCh38_alts_ensembl-4.0.0", - "modified GRCh38", - "GRCh38", - "unknown", - "10x Cell Ranger Human GRCh38 2020-A" - ] - }, - "platform": { - "type": "array", - "items": { - "enum": [ - "Chromium Xo", - "Illumina NextSeq 500", - "Chromium GEM-X Single Cell 3' Chip v4", - "BD FACSMelody", - "Xenium", - "Chromium Next GEM Chip G", - "Chromium X", - "Fluidigm BioMark", - "BD FACSLyric Clinical", - "unknown", - "GEM-X Flex Gene Expression Chip", - "Helios Mass Cytometer", - "BD FACSAria Fusion cell sorter", - "Hyperion", - "BD FACSDiscover S8", - "Chromium Next GEM Chip M", - "Cytek Aurora", - "Not Applicable", - "CyTOF XT", - "Illumina HiSeq X Ten", - "Illumina NovaSeq X", - "Olink Signature Q100", - "Illumina HiSeq 2500", - "BD FACSDiscover A8", - "BD FACSymphony S6", - "Illumina NovaSeq 6000", - "Chromium Next GEM Chip K", - "none", - "Thermo Fisher Attune NxT", - "Chromium Next GEM Chip Q", - "BD FACSAria III", - "Chromium Controller", - "Chromium iX", - "BD FACSCanto", - "Thermo Fisher Attune Xenith", - "Thermo Fisher Attune CytPix", - "Chromium Next GEM Chip H", - "Cytek Aurora Evo", - "BD LSRFortessa", - "Visium CytAssist", - "GEM-X OCM 5' Chip", - "Sony MA900", - "BD FACSCanto II" - ] - }, - "maxItems": 43 - }, - "biospecimenID": {}, - "libraryID": {}, - "10xProbeSetReference": { - "enum": [ - "custom probe set", - "Flex Human Transcriptome Probe Set v1.0.1", - "Visium Human Transcriptome v1", - "Visium Human Transcriptome v2", - "Flex Human Transcriptome Probe Set v1.1.0", - "" - ] - } - }, - "required": [ - "totalReads", - "specimenModality", - "libraryPrepMethod", - "nucleicAcidSource", - "Component", - "assay", - "alignmentReference", - "platform" - ], "allOf": [ { "if": { "properties": { - "specimenModality": { + "LibraryPrepMethod": { "enum": [ - "single specimen" + "10xGEM-XFlexGeneExpressionHuman" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "10xProbeSetReference": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "10xProbeSetReference" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "multispecimen" + "Multispecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "libraryID": { + "LibraryID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "libraryID" + "LibraryID" ] } }, { "if": { "properties": { - "libraryPrepMethod": { + "SpecimenModality": { "enum": [ - "10x GEM-X Flex Gene Expression Human" + "Singlespecimen" ] } - }, - "required": [ - "libraryPrepMethod" - ] + } }, "then": { "properties": { - "10xProbeSetReference": { - "enum": [ - "custom probe set", - "Flex Human Transcriptome Probe Set v1.0.1", - "Visium Human Transcriptome v1", - "Visium Human Transcriptome v2", - "Flex Human Transcriptome Probe Set v1.1.0" - ] + "BiospecimenID": { + "not": { + "type": "null" + } } }, "required": [ - "10xProbeSetReference" + "BiospecimenID" ] } } - ] + ], + "description": "A template outlining metadata to be collected for each library in a bulk ATAC-seq dataset.", + "properties": { + "10xProbeSetReference": { + "description": "Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.", + "enum": [ + "Flex Human Transcriptome Probe Set v1.0.1", + "Flex Human Transcriptome Probe Set v1.1.0", + "Visium Human Transcriptome v1", + "Visium Human Transcriptome v2", + "custom probe set" + ], + "title": "10xProbeSetReference" + }, + "AlignmentReference": { + "description": "The genomic/transcriptomic reference used for performing read alignment against.", + "enum": [ + "10x Cell Ranger Human GRCh38 2020-A", + "10x Cell Ranger Human GRCh38 2024-A", + "GRCh38", + "modified GRCh38", + "unknown", + "vdj_GRCh38_alts_ensembl-4.0.0" + ], + "title": "alignmentReference" + }, + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "items": { + "enum": [ + "ASAPSeq", + "CE-MS", + "CITESeq", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", + "LC-MS/MS", + "NULISA", + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", + "WES", + "WGS", + "Xenium", + "feature barcode sequencing", + "flow cytometry", + "imaging mass cytometry", + "imaging mass spectrometry", + "kiloplex", + "multiplexed ELISA", + "scRNASeq", + "scVDJSeq", + "serial IHC", + "snATACSeq", + "snRNASeq" + ], + "type": "string" + }, + "title": "assay", + "type": "array" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "items": { + "type": "string" + }, + "title": "biospecimenID", + "type": "array" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataCollectionBatch": { + "description": "A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.", + "title": "dataCollectionBatch", + "type": "string" + }, + "LibraryID": { + "description": "A library label or name, unique within an experiment, used to distinguish sequencing libraries.", + "title": "libraryID", + "type": "string" + }, + "LibraryPrepMethod": { + "description": "Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.", + "enum": [ + "10x Chromium Fixed RNA Human Transcriptome", + "10x Chromium GEM-X Single Cell 3' v4", + "10x Chromium GEM-X Single Cell 5' v3", + "10x Chromium Next GEM Single Cell 3'", + "10x Chromium Next GEM Single Cell 3' 3.1", + "10x Chromium Next GEM Single Cell 5' v1.1", + "10x Chromium Next GEM Single Cell 5' v2", + "10x Chromium Next GEM Single Cell ATAC v2", + "10x Chromium Single Cell Human BCR", + "10x Chromium Single Cell Human TCR", + "10x GEM-X Flex Gene Expression Human", + "10x GEM-X Universal 5' Gene Expression v3", + "CEL-Seq2", + "Chromium Next GEM Single Cell ATAC v1.1", + "Fluidigm C1 HT", + "NEBNext Human Immune Sequencing Kit", + "NEBNext Ultra II Directional RNA Library", + "Nextera XT", + "Nextera XT DNA", + "QIAseq miRNA Library", + "SMART-Seq Human BCR with UMI", + "SMART-Seq Human TCR with UMI", + "SMART-Seq v4 Ultra Low Input RNA", + "SMARTer Stranded Total RNA v2", + "Takara Human BCR profiling for Illumina", + "Takara Human TCR profiling for Illumina", + "Takara Human TCRv2 profiling for Illumina", + "Takara Human scTCR profiling for Illumina", + "TruSeq Stranded mRNA", + "custom DASH-treatment", + "in-house library prep" + ], + "title": "libraryPrepMethod" + }, + "NucleicAcidSource": { + "description": "The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.", + "items": { + "enum": [ + "BCR mRNA", + "CRISPR protospacer feature barcode", + "TCR mRNA", + "Tn5-accessible gDNA", + "antigen capture barcode", + "gDNA", + "globin-depleted RNA", + "intracellular protein feature barcode", + "multiplexing oligo", + "poly(A) RNA", + "rRNA-depleted RNA", + "surface protein feature barcode" + ], + "type": "string" + }, + "title": "nucleicAcidSource", + "type": "array" + }, + "Platform": { + "description": "The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.", + "items": { + "enum": [ + "BD FACSAria Fusion cell sorter", + "BD FACSAria III", + "BD FACSCanto", + "BD FACSCanto II", + "BD FACSDiscover A8", + "BD FACSDiscover S8", + "BD FACSLyric Clinical", + "BD FACSMelody", + "BD FACSymphony S6", + "BD LSRFortessa", + "Chromium Controller", + "Chromium GEM-X Single Cell 3' Chip v4", + "Chromium Next GEM Chip G", + "Chromium Next GEM Chip H", + "Chromium Next GEM Chip K", + "Chromium Next GEM Chip M", + "Chromium Next GEM Chip Q", + "Chromium X", + "Chromium Xo", + "Chromium iX", + "CyTOF XT", + "Cytek Aurora", + "Cytek Aurora Evo", + "Fluidigm BioMark", + "GEM-X Flex Gene Expression Chip", + "GEM-X OCM 5' Chip", + "Helios Mass Cytometer", + "Hyperion", + "Illumina HiSeq 2500", + "Illumina HiSeq X Ten", + "Illumina NextSeq 500", + "Illumina NovaSeq 6000", + "Illumina NovaSeq X", + "Not Applicable", + "Olink Signature Q100", + "Sony MA900", + "Thermo Fisher Attune CytPix", + "Thermo Fisher Attune NxT", + "Thermo Fisher Attune Xenith", + "Visium CytAssist", + "Xenium", + "none", + "unknown" + ], + "type": "string" + }, + "title": "platform", + "type": "array" + }, + "SampleProcessingBatch": { + "description": "A label indicating batching of sample processing or preparation that occurs prior to data collection.", + "title": "sampleProcessingBatch", + "type": "string" + }, + "SoftwareAndVersion": { + "description": "Relevant software and version used to generate the data file.", + "enum": [ + "BD FACSDiva 8.0.1", + "Cell Ranger 9.0.1", + "Cell Ranger ATAC v1.1.0", + "Cell Ranger v3.0.0", + "Cell Ranger v3.0.1", + "Cell Ranger v3.0.2", + "Cell Ranger v3.1.0", + "Cell Ranger v4.0.0", + "Cell Ranger v5.0.0", + "Cell Ranger v5.0.1", + "Cell Ranger v6.0.0", + "Cell Ranger v6.0.1", + "Cell Ranger v6.0.2", + "Cell Ranger v6.1.0", + "Cell Ranger v6.1.1", + "Cell Ranger v6.1.2", + "Cell Ranger v7.0.0", + "Cell Ranger v7.0.1", + "Cell Ranger v7.1.0", + "Cell Ranger v7.2.0", + "Cell Ranger v8.0.0", + "Cell Ranger v8.0.1", + "Cell Ranger v9.0.0", + "Space Ranger 3.0.0", + "Space Ranger 3.0.1", + "Space Ranger 3.1.0", + "Space Ranger 3.1.1", + "Space Ranger 3.1.2", + "Space Ranger 3.1.3", + "demuxlet" + ], + "title": "softwareAndVersion" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + }, + "TotalReads": { + "description": "Total number of reads sequenced from the library.", + "title": "totalReads", + "type": "integer" + } + }, + "required": [ + "AlignmentReference", + "Assay", + "Component", + "LibraryPrepMethod", + "NucleicAcidSource", + "Platform", + "SpecimenModality", + "TotalReads" + ], + "title": "BulkATAC-seqAssayMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.BulkRNA-seqAssayMetadataTemplate.schema.json b/model_json_schema/ark.BulkRNA-seqAssayMetadataTemplate.schema.json index 551e6af0..57ab5092 100644 --- a/model_json_schema/ark.BulkRNA-seqAssayMetadataTemplate.schema.json +++ b/model_json_schema/ark.BulkRNA-seqAssayMetadataTemplate.schema.json @@ -1,330 +1,347 @@ { + "$id": "http://example.com/BulkRNA-seqAssayMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "dataCollectionBatch": {}, - "sampleProcessingBatch": {}, - "specimenModality": { - "enum": [ - "unknown", - "single specimen", - "multispecimen" - ] - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "libraryPrepMethod": { - "enum": [ - "Nextera XT", - "Chromium Next GEM Single Cell ATAC v1.1", - "Takara Human TCR profiling for Illumina", - "10x GEM-X Universal 5' Gene Expression v3", - "10x Chromium Next GEM Single Cell 3'", - "NEBNext Human Immune Sequencing Kit", - "10x Chromium Single Cell Human TCR", - "10x Chromium Next GEM Single Cell 5' v1.1", - "10x Chromium Next GEM Single Cell 3' 3.1", - "TruSeq Stranded mRNA", - "custom DASH-treatment", - "SMART-Seq Human BCR with UMI", - "10x Chromium GEM-X Single Cell 3' v4", - "Takara Human TCRv2 profiling for Illumina", - "SMART-Seq v4 Ultra Low Input RNA", - "SMARTer Stranded Total RNA v2", - "SMART-Seq Human TCR with UMI", - "Nextera XT DNA", - "Takara Human BCR profiling for Illumina", - "in-house library prep", - "10x Chromium Next GEM Single Cell ATAC v2", - "NEBNext Ultra II Directional RNA Library", - "CEL-Seq2", - "Fluidigm C1 HT", - "QIAseq miRNA Library", - "10x GEM-X Flex Gene Expression Human", - "10x Chromium GEM-X Single Cell 5' v3", - "Takara Human scTCR profiling for Illumina", - "10x Chromium Single Cell Human BCR", - "10x Chromium Fixed RNA Human Transcriptome", - "10x Chromium Next GEM Single Cell 5' v2" - ] - }, - "platform": { - "type": "array", - "items": { - "enum": [ - "Visium CytAssist", - "BD FACSCanto II", - "BD FACSMelody", - "Chromium Xo", - "Helios Mass Cytometer", - "Not Applicable", - "Chromium iX", - "Thermo Fisher Attune NxT", - "none", - "BD FACSAria III", - "BD FACSymphony S6", - "BD FACSCanto", - "Chromium Next GEM Chip H", - "Xenium", - "Cytek Aurora Evo", - "Cytek Aurora", - "Chromium Next GEM Chip M", - "GEM-X Flex Gene Expression Chip", - "BD FACSAria Fusion cell sorter", - "Olink Signature Q100", - "GEM-X OCM 5' Chip", - "Illumina NextSeq 500", - "Thermo Fisher Attune Xenith", - "unknown", - "Hyperion", - "BD FACSDiscover A8", - "Chromium Next GEM Chip G", - "Chromium Next GEM Chip Q", - "Chromium X", - "Illumina NovaSeq X", - "Illumina HiSeq X Ten", - "Fluidigm BioMark", - "BD FACSLyric Clinical", - "CyTOF XT", - "Sony MA900", - "Thermo Fisher Attune CytPix", - "Illumina NovaSeq 6000", - "Chromium GEM-X Single Cell 3' Chip v4", - "BD LSRFortessa", - "BD FACSDiscover S8", - "Chromium Controller", - "Illumina HiSeq 2500", - "Chromium Next GEM Chip K" - ] - }, - "maxItems": 43 - }, - "softwareAndVersion": { - "enum": [ - "BD FACSDiva 8.0.1", - "Cell Ranger v8.0.0", - "Cell Ranger v6.0.0", - "Cell Ranger v6.1.1", - "Space Ranger 3.0.1", - "Space Ranger 3.1.2", - "Cell Ranger v7.0.0", - "Cell Ranger v6.1.0", - "Cell Ranger v5.0.1", - "Cell Ranger ATAC v1.1.0", - "Cell Ranger v3.1.0", - "demuxlet", - "Cell Ranger v7.2.0", - "Cell Ranger v3.0.0", - "Space Ranger 3.0.0", - "Cell Ranger v9.0.0", - "Cell Ranger 9.0.1", - "Cell Ranger v7.1.0", - "Cell Ranger v4.0.0", - "Cell Ranger v3.0.2", - "Cell Ranger v3.0.1", - "Cell Ranger v6.0.1", - "Cell Ranger v5.0.0", - "Space Ranger 3.1.0", - "Cell Ranger v6.0.2", - "Cell Ranger v6.1.2", - "Cell Ranger v7.0.1", - "Cell Ranger v8.0.1", - "Space Ranger 3.1.3", - "Space Ranger 3.1.1", - "" - ] - }, - "totalReads": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "alignmentReference": { - "enum": [ - "vdj_GRCh38_alts_ensembl-4.0.0", - "modified GRCh38", - "GRCh38", - "unknown", - "10x Cell Ranger Human GRCh38 2024-A", - "10x Cell Ranger Human GRCh38 2020-A" - ] - }, - "nucleicAcidSource": { - "type": "array", - "items": { - "enum": [ - "gDNA", - "Tn5-accessible gDNA", - "TCR mRNA", - "intracellular protein feature barcode", - "rRNA-depleted RNA", - "CRISPR protospacer feature barcode", - "BCR mRNA", - "poly(A) RNA", - "antigen capture barcode", - "surface protein feature barcode", - "multiplexing oligo", - "globin-depleted RNA" - ] - }, - "maxItems": 12 - }, - "assay": { - "type": "array", - "items": { - "enum": [ - "CyTOF", - "Visium", - "imaging mass cytometry", - "SomaScan", - "GenePS SeqFISH", - "Olink Target 96", - "Olink Flex", - "RNASeq", - "Olink Target 48", - "Xenium", - "Olink Explore HT", - "Olink Focus", - "ASAPSeq", - "snRNASeq", - "imaging mass spectrometry", - "H&E", - "feature barcode sequencing", - "serial IHC", - "scVDJSeq", - "scRNASeq", - "LC-MS/MS", - "SNP array", - "flow cytometry", - "NULISA", - "WES", - "WGS", - "multiplexed ELISA", - "kiloplex", - "snATACSeq", - "CITESeq", - "CE-MS", - "Olink Reveal", - "CosMX", - "VDJSeq" - ] - }, - "maxItems": 34 - }, - "biospecimenID": {}, - "libraryID": {}, - "10xProbeSetReference": { - "enum": [ - "custom probe set", - "Flex Human Transcriptome Probe Set v1.0.1", - "Visium Human Transcriptome v2", - "Flex Human Transcriptome Probe Set v1.1.0", - "Visium Human Transcriptome v1", - "" - ] - } - }, - "required": [ - "specimenModality", - "Component", - "libraryPrepMethod", - "platform", - "totalReads", - "alignmentReference", - "nucleicAcidSource", - "assay" - ], "allOf": [ { "if": { "properties": { - "specimenModality": { + "LibraryPrepMethod": { "enum": [ - "single specimen" + "10xGEM-XFlexGeneExpressionHuman" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "10xProbeSetReference": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "10xProbeSetReference" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "multispecimen" + "Multispecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "libraryID": { + "LibraryID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "libraryID" + "LibraryID" ] } }, { "if": { "properties": { - "libraryPrepMethod": { + "SpecimenModality": { "enum": [ - "10x GEM-X Flex Gene Expression Human" + "Singlespecimen" ] } - }, - "required": [ - "libraryPrepMethod" - ] + } }, "then": { "properties": { - "10xProbeSetReference": { - "enum": [ - "custom probe set", - "Flex Human Transcriptome Probe Set v1.0.1", - "Visium Human Transcriptome v2", - "Flex Human Transcriptome Probe Set v1.1.0", - "Visium Human Transcriptome v1" - ] + "BiospecimenID": { + "not": { + "type": "null" + } } }, "required": [ - "10xProbeSetReference" + "BiospecimenID" ] } } - ] + ], + "description": "A template outlining metadata to be collected for each library in a bulk RNA-seq dataset.", + "properties": { + "10xProbeSetReference": { + "description": "Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.", + "enum": [ + "Flex Human Transcriptome Probe Set v1.0.1", + "Flex Human Transcriptome Probe Set v1.1.0", + "Visium Human Transcriptome v1", + "Visium Human Transcriptome v2", + "custom probe set" + ], + "title": "10xProbeSetReference" + }, + "AlignmentReference": { + "description": "The genomic/transcriptomic reference used for performing read alignment against.", + "enum": [ + "10x Cell Ranger Human GRCh38 2020-A", + "10x Cell Ranger Human GRCh38 2024-A", + "GRCh38", + "modified GRCh38", + "unknown", + "vdj_GRCh38_alts_ensembl-4.0.0" + ], + "title": "alignmentReference" + }, + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "items": { + "enum": [ + "ASAPSeq", + "CE-MS", + "CITESeq", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", + "LC-MS/MS", + "NULISA", + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", + "WES", + "WGS", + "Xenium", + "feature barcode sequencing", + "flow cytometry", + "imaging mass cytometry", + "imaging mass spectrometry", + "kiloplex", + "multiplexed ELISA", + "scRNASeq", + "scVDJSeq", + "serial IHC", + "snATACSeq", + "snRNASeq" + ], + "type": "string" + }, + "title": "assay", + "type": "array" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "items": { + "type": "string" + }, + "title": "biospecimenID", + "type": "array" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataCollectionBatch": { + "description": "A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.", + "title": "dataCollectionBatch", + "type": "string" + }, + "LibraryID": { + "description": "A library label or name, unique within an experiment, used to distinguish sequencing libraries.", + "title": "libraryID", + "type": "string" + }, + "LibraryPrepMethod": { + "description": "Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.", + "enum": [ + "10x Chromium Fixed RNA Human Transcriptome", + "10x Chromium GEM-X Single Cell 3' v4", + "10x Chromium GEM-X Single Cell 5' v3", + "10x Chromium Next GEM Single Cell 3'", + "10x Chromium Next GEM Single Cell 3' 3.1", + "10x Chromium Next GEM Single Cell 5' v1.1", + "10x Chromium Next GEM Single Cell 5' v2", + "10x Chromium Next GEM Single Cell ATAC v2", + "10x Chromium Single Cell Human BCR", + "10x Chromium Single Cell Human TCR", + "10x GEM-X Flex Gene Expression Human", + "10x GEM-X Universal 5' Gene Expression v3", + "CEL-Seq2", + "Chromium Next GEM Single Cell ATAC v1.1", + "Fluidigm C1 HT", + "NEBNext Human Immune Sequencing Kit", + "NEBNext Ultra II Directional RNA Library", + "Nextera XT", + "Nextera XT DNA", + "QIAseq miRNA Library", + "SMART-Seq Human BCR with UMI", + "SMART-Seq Human TCR with UMI", + "SMART-Seq v4 Ultra Low Input RNA", + "SMARTer Stranded Total RNA v2", + "Takara Human BCR profiling for Illumina", + "Takara Human TCR profiling for Illumina", + "Takara Human TCRv2 profiling for Illumina", + "Takara Human scTCR profiling for Illumina", + "TruSeq Stranded mRNA", + "custom DASH-treatment", + "in-house library prep" + ], + "title": "libraryPrepMethod" + }, + "NucleicAcidSource": { + "description": "The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.", + "items": { + "enum": [ + "BCR mRNA", + "CRISPR protospacer feature barcode", + "TCR mRNA", + "Tn5-accessible gDNA", + "antigen capture barcode", + "gDNA", + "globin-depleted RNA", + "intracellular protein feature barcode", + "multiplexing oligo", + "poly(A) RNA", + "rRNA-depleted RNA", + "surface protein feature barcode" + ], + "type": "string" + }, + "title": "nucleicAcidSource", + "type": "array" + }, + "Platform": { + "description": "The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.", + "items": { + "enum": [ + "BD FACSAria Fusion cell sorter", + "BD FACSAria III", + "BD FACSCanto", + "BD FACSCanto II", + "BD FACSDiscover A8", + "BD FACSDiscover S8", + "BD FACSLyric Clinical", + "BD FACSMelody", + "BD FACSymphony S6", + "BD LSRFortessa", + "Chromium Controller", + "Chromium GEM-X Single Cell 3' Chip v4", + "Chromium Next GEM Chip G", + "Chromium Next GEM Chip H", + "Chromium Next GEM Chip K", + "Chromium Next GEM Chip M", + "Chromium Next GEM Chip Q", + "Chromium X", + "Chromium Xo", + "Chromium iX", + "CyTOF XT", + "Cytek Aurora", + "Cytek Aurora Evo", + "Fluidigm BioMark", + "GEM-X Flex Gene Expression Chip", + "GEM-X OCM 5' Chip", + "Helios Mass Cytometer", + "Hyperion", + "Illumina HiSeq 2500", + "Illumina HiSeq X Ten", + "Illumina NextSeq 500", + "Illumina NovaSeq 6000", + "Illumina NovaSeq X", + "Not Applicable", + "Olink Signature Q100", + "Sony MA900", + "Thermo Fisher Attune CytPix", + "Thermo Fisher Attune NxT", + "Thermo Fisher Attune Xenith", + "Visium CytAssist", + "Xenium", + "none", + "unknown" + ], + "type": "string" + }, + "title": "platform", + "type": "array" + }, + "SampleProcessingBatch": { + "description": "A label indicating batching of sample processing or preparation that occurs prior to data collection.", + "title": "sampleProcessingBatch", + "type": "string" + }, + "SoftwareAndVersion": { + "description": "Relevant software and version used to generate the data file.", + "enum": [ + "BD FACSDiva 8.0.1", + "Cell Ranger 9.0.1", + "Cell Ranger ATAC v1.1.0", + "Cell Ranger v3.0.0", + "Cell Ranger v3.0.1", + "Cell Ranger v3.0.2", + "Cell Ranger v3.1.0", + "Cell Ranger v4.0.0", + "Cell Ranger v5.0.0", + "Cell Ranger v5.0.1", + "Cell Ranger v6.0.0", + "Cell Ranger v6.0.1", + "Cell Ranger v6.0.2", + "Cell Ranger v6.1.0", + "Cell Ranger v6.1.1", + "Cell Ranger v6.1.2", + "Cell Ranger v7.0.0", + "Cell Ranger v7.0.1", + "Cell Ranger v7.1.0", + "Cell Ranger v7.2.0", + "Cell Ranger v8.0.0", + "Cell Ranger v8.0.1", + "Cell Ranger v9.0.0", + "Space Ranger 3.0.0", + "Space Ranger 3.0.1", + "Space Ranger 3.1.0", + "Space Ranger 3.1.1", + "Space Ranger 3.1.2", + "Space Ranger 3.1.3", + "demuxlet" + ], + "title": "softwareAndVersion" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + }, + "TotalReads": { + "description": "Total number of reads sequenced from the library.", + "title": "totalReads", + "type": "integer" + } + }, + "required": [ + "AlignmentReference", + "Assay", + "Component", + "LibraryPrepMethod", + "NucleicAcidSource", + "Platform", + "SpecimenModality", + "TotalReads" + ], + "title": "BulkRNA-seqAssayMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.ClinicalMetadataTemplate.schema.json b/model_json_schema/ark.ClinicalMetadataTemplate.schema.json index 657f453e..7fe3c505 100644 --- a/model_json_schema/ark.ClinicalMetadataTemplate.schema.json +++ b/model_json_schema/ark.ClinicalMetadataTemplate.schema.json @@ -1,242 +1,47 @@ { + "$id": "http://example.com/ClinicalMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "heightUnits": { - "enum": [ - "centimeters", - "feet", - "inches", - "meters" - ] - }, - "comorbidities": { - "type": "array", - "items": { - "enum": [ - "autoimmune thyroid disease", - "multiple sclerosis", - "psoriasis", - "systemic lupus erythematosus", - "inflammatory bowel disease", - "psoriatic arthritis", - "diabetes", - "rheumatoid arthritis", - "Hashimoto's Thyroiditis", - "pulmonary disease", - "other", - "cardiovascular disease", - "" - ] - }, - "maxItems": 12 - }, - "age": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "ethnicity": { - "enum": [ - "Hispanic or Latino", - "unknown", - "Not Hispanic or Latino", - "" - ] - }, - "ageUnits": { - "enum": [ - "years", - "months" - ] - }, - "weight": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "program": { - "enum": [ - "AMP AIM", - "AMP RA/SLE", - "Community Contribution" - ] - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "species": { - "enum": [ - "Homo sapiens" - ] - }, - "individualID": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "sex": { - "enum": [ - "intersex", - "unknown", - "female", - "male" - ] - }, - "race": { - "enum": [ - "White", - "Hispanic", - "Mixed Race", - "American Indian or Alaska Native", - "Asian", - "unknown", - "other", - "Native Hawaiian or Other Pacific Islander", - "Black or African American", - "" - ] - }, - "weightUnits": { - "enum": [ - "g", - "kg", - "oz", - "lb" - ] - }, - "project": { - "type": "array", - "items": { - "enum": [ - "AIM for RA", - "STAMP", - "LOCKIT", - "SLE", - "UMass V-CoRT", - "ELLIPSS", - "RA" - ] - }, - "maxItems": 7 - }, - "height": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "diagnosis": { - "type": "array", - "items": { - "enum": [ - "dermatomyositis", - "Sjogren's disease", - "cutaneous lupus erythematosus", - "psoriasis", - "vitiligo", - "scleroderma", - "control", - "At-Risk RA", - "OA", - "psoriatic arthritis", - "unknown", - "lupus nephritis", - "SLE", - "discoid lupus erythematosus", - "Not Applicable", - "RA" - ] - }, - "maxItems": 16 - }, - "PASI": {}, - "diabetesType": { - "enum": [ - "gestational", - "unknown", - "type 1", - "type 2", - "" - ] - }, - "visitID": {}, - "CDASI": {}, - "VASI": {}, - "VETI": {}, - "vitiligoPattern": { - "enum": [ - "mixed", - "unclassified", - "non-segmental", - "segmental", - "" - ] - }, - "VIDA": {} - }, - "required": [ - "heightUnits", - "age", - "ageUnits", - "weight", - "program", - "Component", - "species", - "individualID", - "sex", - "weightUnits", - "project", - "height", - "diagnosis" - ], "allOf": [ { "if": { "properties": { - "diagnosis": { + "Comorbidities": { "enum": [ - "psoriasis" + "Diabetes" ] } - }, - "required": [ - "diagnosis" - ] + } }, "then": { "properties": { - "PASI": {} + "DiabetesType": { + "not": { + "type": "null" + } + } }, "required": [ - "PASI" + "DiabetesType" ] } }, { "if": { "properties": { - "diagnosis": { + "Comorbidities": { "enum": [ - "psoriasis" + "Psoriasis" ] } - }, - "required": [ - "diagnosis" - ] + } }, "then": { "properties": { - "PASI": {} + "PASI": { + "not": { + "type": "null" + } + } }, "required": [ "PASI" @@ -246,19 +51,20 @@ { "if": { "properties": { - "diagnosis": { + "Diagnosis": { "enum": [ - "psoriasis" + "Psoriasis" ] } - }, - "required": [ - "diagnosis" - ] + } }, "then": { "properties": { - "PASI": {} + "PASI": { + "not": { + "type": "null" + } + } }, "required": [ "PASI" @@ -268,19 +74,20 @@ { "if": { "properties": { - "diagnosis": { + "Comorbidities": { "enum": [ - "psoriasis" + "Psoriasis" ] } - }, - "required": [ - "diagnosis" - ] + } }, "then": { "properties": { - "PASI": {} + "PASI": { + "not": { + "type": "null" + } + } }, "required": [ "PASI" @@ -290,204 +97,420 @@ { "if": { "properties": { - "comorbidities": { + "Diagnosis": { "enum": [ - "diabetes" + "Psoriasis" ] } - }, - "required": [ - "comorbidities" - ] + } }, "then": { "properties": { - "diabetesType": { - "enum": [ - "gestational", - "unknown", - "type 1", - "type 2", - "" - ] + "PASI": { + "not": { + "type": "null" + } } }, "required": [ - "diabetesType" + "PASI" ] } }, { "if": { "properties": { - "program": { + "Diagnosis": { "enum": [ - "AMP AIM" + "Dermatomyositis" ] } - }, - "required": [ - "program" - ] + } }, "then": { "properties": { - "visitID": { + "CDASI": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "visitID" + "CDASI" ] } }, { "if": { "properties": { - "program": { + "Diagnosis": { "enum": [ - "AMP RA/SLE" + "Vitiligo" ] } - }, - "required": [ - "program" - ] + } }, "then": { "properties": { - "visitID": { + "VASI": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "visitID" + "VASI" ] } }, { "if": { "properties": { - "diagnosis": { + "Diagnosis": { "enum": [ - "dermatomyositis" + "Vitiligo" ] } - }, - "required": [ - "diagnosis" - ] + } }, "then": { "properties": { - "CDASI": {} + "VETI": { + "not": { + "type": "null" + } + } }, "required": [ - "CDASI" + "VETI" ] } }, { "if": { "properties": { - "diagnosis": { + "Diagnosis": { "enum": [ - "vitiligo" + "Vitiligo" ] } - }, - "required": [ - "diagnosis" - ] + } }, "then": { "properties": { - "VASI": {} + "VIDA": { + "not": { + "type": "null" + } + } }, "required": [ - "VASI" + "VIDA" ] } }, { "if": { "properties": { - "diagnosis": { + "Diagnosis": { "enum": [ - "vitiligo" + "Vitiligo" ] } - }, - "required": [ - "diagnosis" - ] + } }, "then": { "properties": { - "VETI": {} + "VitiligoPattern": { + "not": { + "type": "null" + } + } }, "required": [ - "VETI" + "VitiligoPattern" ] } }, { "if": { "properties": { - "diagnosis": { + "Program": { "enum": [ - "vitiligo" + "AMPAIM" ] } - }, - "required": [ - "diagnosis" - ] + } }, "then": { "properties": { - "vitiligoPattern": { - "enum": [ - "mixed", - "unclassified", - "non-segmental", - "segmental", - "" - ] + "VisitID": { + "not": { + "type": "null" + } } }, "required": [ - "vitiligoPattern" + "VisitID" ] } }, { "if": { "properties": { - "diagnosis": { + "Program": { "enum": [ - "vitiligo" + "AMPRA/SLE" ] } - }, - "required": [ - "diagnosis" - ] + } }, "then": { "properties": { - "VIDA": {} + "VisitID": { + "not": { + "type": "null" + } + } }, "required": [ - "VIDA" + "VisitID" ] } } - ] + ], + "description": "A template outlining clinical metadata to collect for study subjects.", + "properties": { + "Age": { + "description": "Age at which subject was enrolled in study or age at corresponding visit and data collection event. If value unknown, enter '-1'.", + "title": "age", + "type": "number" + }, + "AgeUnits": { + "description": "The unit of measure used for `ageEnrollment` and `ageDiagnosis`", + "enum": [ + "months", + "years" + ], + "title": "ageUnits" + }, + "CDASI": { + "description": "Cutaneous Dermatomyositis Disease Area and Severity Index, is a measurement used to characterize cutaneous dermatomyositis severity.", + "title": "CDASI", + "type": "string" + }, + "Comorbidities": { + "description": "Any diseases or medical condition that is simultaneously present in addition to `diagnosis`.", + "items": { + "enum": [ + "Hashimoto's Thyroiditis", + "autoimmune thyroid disease", + "cardiovascular disease", + "diabetes", + "inflammatory bowel disease", + "multiple sclerosis", + "other", + "psoriasis", + "psoriatic arthritis", + "pulmonary disease", + "rheumatoid arthritis", + "systemic lupus erythematosus" + ], + "type": "string" + }, + "title": "comorbidities", + "type": "array" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DiabetesType": { + "description": "Type of diabetes mellitus.", + "enum": [ + "gestational", + "type 1", + "type 2", + "unknown" + ], + "title": "diabetesType" + }, + "Diagnosis": { + "description": "A high-level classifier indicating the disease status of an individual.", + "items": { + "enum": [ + "At-Risk RA", + "Not Applicable", + "OA", + "RA", + "SLE", + "Sjogren's disease", + "control", + "cutaneous lupus erythematosus", + "dermatomyositis", + "discoid lupus erythematosus", + "lupus nephritis", + "psoriasis", + "psoriatic arthritis", + "scleroderma", + "unknown", + "vitiligo" + ], + "type": "string" + }, + "title": "diagnosis", + "type": "array" + }, + "Ethnicity": { + "description": "The ethnicity of a person.", + "enum": [ + "Hispanic or Latino", + "Not Hispanic or Latino", + "unknown" + ], + "title": "ethnicity" + }, + "Height": { + "description": "Standing height of subject.", + "title": "height", + "type": "number" + }, + "HeightUnits": { + "description": "Unit of measure of value provided for `height`.", + "enum": [ + "centimeters", + "feet", + "inches", + "meters" + ], + "title": "heightUnits" + }, + "IndividualID": { + "description": "Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "individualID", + "type": "string" + }, + "PASI": { + "description": "Psoriasis Area and Severity Index, is a measurement of the discoloration, thickness, scaling, and coverage of psoriasis plaques.", + "title": "PASI", + "type": "string" + }, + "Program": { + "description": "Name of the funding program that supported the generation of data and associated files", + "enum": [ + "AMP AIM", + "AMP RA/SLE", + "Community Contribution" + ], + "title": "program" + }, + "Project": { + "description": "A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.", + "enum": [ + "AIM for RA", + "ELLIPSS", + "LOCKIT", + "RA", + "SLE", + "STAMP", + "UMass V-CoRT" + ], + "title": "project" + }, + "Race": { + "description": "A textual description of a person's race.", + "enum": [ + "American Indian or Alaska Native", + "Asian", + "Black or African American", + "Hispanic", + "Mixed Race", + "Native Hawaiian or Other Pacific Islander", + "White", + "other", + "unknown" + ], + "title": "race" + }, + "Sex": { + "description": "A textual description of a person's sex at birth.", + "enum": [ + "female", + "intersex", + "male", + "unknown" + ], + "title": "sex" + }, + "Species": { + "description": "The genus species of sample or subject origin.", + "enum": [ + "Homo sapiens" + ], + "title": "species" + }, + "VASI": { + "description": "Total body Vitiligo Area Severity Index, a measure of the percentage of vitiligo involvement across the body calculated in terms of hand units.", + "title": "VASI", + "type": "string" + }, + "VETI": { + "description": "Vitiligo Extent Tensity Index measures the extent of vitiligo by a numerical score and combines analysis of extensity and severity of vitiligo to produce a constant and reproducible number.", + "title": "VETI", + "type": "number" + }, + "VIDA": { + "description": "Vitiligo Disease Activity Score, a six-point scale that evaluates the activity of vitiligo", + "title": "VIDA", + "type": "string" + }, + "VisitID": { + "description": "Ordinal ID distinguishing different patient visits.", + "title": "visitID", + "type": "string" + }, + "VitiligoPattern": { + "description": "A high-level classification of vitiligo based on the distribution and patterning of lesions across the body.", + "enum": [ + "mixed", + "non-segmental", + "segmental", + "unclassified" + ], + "title": "vitiligoPattern" + }, + "Weight": { + "description": "Weight of subject. If value unknown, enter '-1'.", + "title": "weight", + "type": "number" + }, + "WeightUnits": { + "description": "Abbreviated unit of measure of value provided for `weight`.", + "enum": [ + "g", + "kg", + "lb", + "oz" + ], + "title": "weightUnits" + } + }, + "required": [ + "Age", + "AgeUnits", + "Component", + "Diagnosis", + "Height", + "HeightUnits", + "IndividualID", + "Program", + "Project", + "Sex", + "Species", + "Weight", + "WeightUnits" + ], + "title": "ClinicalMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.CyTOFAssayMetadataTemplate.schema.json b/model_json_schema/ark.CyTOFAssayMetadataTemplate.schema.json index 208a9c2f..e303ae7a 100644 --- a/model_json_schema/ark.CyTOFAssayMetadataTemplate.schema.json +++ b/model_json_schema/ark.CyTOFAssayMetadataTemplate.schema.json @@ -1,171 +1,189 @@ { + "$id": "http://example.com/CyTOFAssayMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", + "description": "A template outlining assay-related metadata for a cytometry time-of-flight (CyTOF) dataset. Each row corresponds to a biospecimen profiled in the experiment.", "properties": { - "softwareAndVersion": { - "enum": [ - "Cell Ranger v6.0.2", - "Cell Ranger v3.0.0", - "Cell Ranger 9.0.1", - "Space Ranger 3.1.0", - "Cell Ranger ATAC v1.1.0", - "Cell Ranger v4.0.0", - "Cell Ranger v3.0.2", - "Cell Ranger v9.0.0", - "Space Ranger 3.1.1", - "Cell Ranger v6.0.0", - "Cell Ranger v7.0.0", - "Cell Ranger v5.0.1", - "Cell Ranger v7.2.0", - "Cell Ranger v3.0.1", - "Cell Ranger v5.0.0", - "BD FACSDiva 8.0.1", - "Space Ranger 3.1.2", - "demuxlet", - "Cell Ranger v3.1.0", - "Cell Ranger v6.1.0", - "Space Ranger 3.0.1", - "Cell Ranger v8.0.1", - "Cell Ranger v6.0.1", - "Cell Ranger v7.1.0", - "Space Ranger 3.0.0", - "Cell Ranger v7.0.1", - "Cell Ranger v6.1.1", - "Cell Ranger v6.1.2", - "Space Ranger 3.1.3", - "Cell Ranger v8.0.0", - "" - ] - }, - "dataCollectionBatch": {}, - "Component": { - "not": { - "type": "null" + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "items": { + "enum": [ + "ASAPSeq", + "CE-MS", + "CITESeq", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", + "LC-MS/MS", + "NULISA", + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", + "WES", + "WGS", + "Xenium", + "feature barcode sequencing", + "flow cytometry", + "imaging mass cytometry", + "imaging mass spectrometry", + "kiloplex", + "multiplexed ELISA", + "scRNASeq", + "scVDJSeq", + "serial IHC", + "snATACSeq", + "snRNASeq" + ], + "type": "string" }, - "minLength": 1 + "title": "assay", + "type": "array" }, - "targetPanelSynID": { - "not": { - "type": "null" + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "items": { + "type": "string" }, - "minLength": 1 + "title": "biospecimenID", + "type": "array" }, - "biospecimenID": { - "not": { - "type": "null" - }, - "minLength": 1 + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" }, - "sampleProcessingBatch": {}, - "platform": { - "type": "array", + "DataCollectionBatch": { + "description": "A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.", + "title": "dataCollectionBatch", + "type": "string" + }, + "Platform": { + "description": "The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.", "items": { "enum": [ "BD FACSAria Fusion cell sorter", - "unknown", - "GEM-X Flex Gene Expression Chip", + "BD FACSAria III", "BD FACSCanto", - "BD FACSMelody", + "BD FACSCanto II", + "BD FACSDiscover A8", + "BD FACSDiscover S8", "BD FACSLyric Clinical", - "Illumina HiSeq X Ten", - "Illumina NextSeq 500", - "Cytek Aurora Evo", - "Chromium Next GEM Chip Q", - "Helios Mass Cytometer", + "BD FACSMelody", "BD FACSymphony S6", + "BD LSRFortessa", "Chromium Controller", + "Chromium GEM-X Single Cell 3' Chip v4", + "Chromium Next GEM Chip G", + "Chromium Next GEM Chip H", "Chromium Next GEM Chip K", - "Fluidigm BioMark", - "GEM-X OCM 5' Chip", + "Chromium Next GEM Chip M", + "Chromium Next GEM Chip Q", + "Chromium X", + "Chromium Xo", + "Chromium iX", "CyTOF XT", "Cytek Aurora", - "Xenium", - "Visium CytAssist", - "Thermo Fisher Attune Xenith", - "BD FACSAria III", - "Chromium X", - "Chromium Next GEM Chip H", + "Cytek Aurora Evo", + "Fluidigm BioMark", + "GEM-X Flex Gene Expression Chip", + "GEM-X OCM 5' Chip", + "Helios Mass Cytometer", + "Hyperion", + "Illumina HiSeq 2500", + "Illumina HiSeq X Ten", + "Illumina NextSeq 500", + "Illumina NovaSeq 6000", "Illumina NovaSeq X", - "Sony MA900", - "Chromium Next GEM Chip G", + "Not Applicable", "Olink Signature Q100", - "BD LSRFortessa", - "BD FACSDiscover S8", - "Chromium Xo", + "Sony MA900", + "Thermo Fisher Attune CytPix", "Thermo Fisher Attune NxT", - "BD FACSCanto II", + "Thermo Fisher Attune Xenith", + "Visium CytAssist", + "Xenium", "none", - "Not Applicable", - "Chromium GEM-X Single Cell 3' Chip v4", - "Illumina HiSeq 2500", - "Chromium Next GEM Chip M", - "BD FACSDiscover A8", - "Hyperion", - "Thermo Fisher Attune CytPix", - "Illumina NovaSeq 6000", - "Chromium iX" - ] + "unknown" + ], + "type": "string" }, - "maxItems": 43 + "title": "platform", + "type": "array" }, - "targetPanelSize": {}, - "assay": { - "type": "array", - "items": { - "enum": [ - "H&E", - "VDJSeq", - "imaging mass spectrometry", - "RNASeq", - "Olink Explore HT", - "Olink Target 96", - "Olink Flex", - "kiloplex", - "imaging mass cytometry", - "CosMX", - "snRNASeq", - "CE-MS", - "WES", - "SomaScan", - "Xenium", - "ASAPSeq", - "Olink Target 48", - "scVDJSeq", - "multiplexed ELISA", - "scRNASeq", - "snATACSeq", - "WGS", - "flow cytometry", - "CITESeq", - "NULISA", - "LC-MS/MS", - "Olink Focus", - "GenePS SeqFISH", - "Visium", - "serial IHC", - "Olink Reveal", - "feature barcode sequencing", - "SNP array", - "CyTOF" - ] - }, - "maxItems": 34 + "SampleProcessingBatch": { + "description": "A label indicating batching of sample processing or preparation that occurs prior to data collection.", + "title": "sampleProcessingBatch", + "type": "string" }, - "targetPanel": { - "not": { - "type": "null" - }, - "minLength": 1 + "SoftwareAndVersion": { + "description": "Relevant software and version used to generate the data file.", + "enum": [ + "BD FACSDiva 8.0.1", + "Cell Ranger 9.0.1", + "Cell Ranger ATAC v1.1.0", + "Cell Ranger v3.0.0", + "Cell Ranger v3.0.1", + "Cell Ranger v3.0.2", + "Cell Ranger v3.1.0", + "Cell Ranger v4.0.0", + "Cell Ranger v5.0.0", + "Cell Ranger v5.0.1", + "Cell Ranger v6.0.0", + "Cell Ranger v6.0.1", + "Cell Ranger v6.0.2", + "Cell Ranger v6.1.0", + "Cell Ranger v6.1.1", + "Cell Ranger v6.1.2", + "Cell Ranger v7.0.0", + "Cell Ranger v7.0.1", + "Cell Ranger v7.1.0", + "Cell Ranger v7.2.0", + "Cell Ranger v8.0.0", + "Cell Ranger v8.0.1", + "Cell Ranger v9.0.0", + "Space Ranger 3.0.0", + "Space Ranger 3.0.1", + "Space Ranger 3.1.0", + "Space Ranger 3.1.1", + "Space Ranger 3.1.2", + "Space Ranger 3.1.3", + "demuxlet" + ], + "title": "softwareAndVersion" + }, + "TargetPanel": { + "description": "A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.", + "title": "targetPanel", + "type": "string" + }, + "TargetPanelSize": { + "description": "The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).", + "title": "targetPanelSize", + "type": "integer" + }, + "TargetPanelSynID": { + "description": "In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.", + "pattern": "^syn[0-9]{8}", + "title": "targetPanelSynID", + "type": "string" } }, "required": [ + "Assay", + "BiospecimenID", "Component", - "targetPanelSynID", - "biospecimenID", - "platform", - "assay", - "targetPanel" - ] + "Platform", + "TargetPanel", + "TargetPanelSynID" + ], + "title": "CyTOFAssayMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.DatasetAnnotationTemplate.schema.json b/model_json_schema/ark.DatasetAnnotationTemplate.schema.json index 12fa0e98..e9da8a94 100644 --- a/model_json_schema/ark.DatasetAnnotationTemplate.schema.json +++ b/model_json_schema/ark.DatasetAnnotationTemplate.schema.json @@ -1,299 +1,348 @@ { + "$id": "http://example.com/DatasetAnnotationTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", + "allOf": [ + { + "if": { + "properties": { + "Program": { + "enum": [ + "AMPRA/SLE" + ] + } + } + }, + "then": { + "properties": { + "ProgramPhase": { + "not": { + "type": "null" + } + } + }, + "required": [ + "ProgramPhase" + ] + } + } + ], + "description": "A template outlining dataset metadata to use as annotations for a synapse dataset entity.", "properties": { - "acknowledgmentStatement": { + "ARKRelease": { + "description": "A Dataset-specific attribute specifying the ARK Portal release in which this dataset was first made available to the public.", "enum": [ - "syn26710600/wiki/619685" - ] - }, - "diagnosis": { - "type": "array", - "items": { - "enum": [ - "dermatomyositis", - "lupus nephritis", - "psoriatic arthritis", - "control", - "Sjogren's disease", - "unknown", - "cutaneous lupus erythematosus", - "vitiligo", - "SLE", - "scleroderma", - "At-Risk RA", - "psoriasis", - "OA", - "discoid lupus erythematosus", - "RA", - "Not Applicable" - ] - }, - "maxItems": 16 + "1.0", + "2.0", + "2024.06.R1", + "2024.07.R1", + "2024.08.R1", + "2024.09.R1", + "2024.10.R1", + "2024.12.R1", + "2025.01.R1", + "2025.02.R1", + "2025.03.R1", + "2025.04.R1", + "2025.05.R1", + "2025.06.R1", + "2025.07.R1", + "2025.08.R1", + "2025.09.R1", + "2025.10.R1", + "2025.11.R1", + "2025.12.R1" + ], + "title": "ARKRelease" }, - "species": { + "AcknowledgmentStatement": { + "description": "A Dataset-specific attribute specifying the path to the wiki subpage within the ARK Portal - backend project that contains the acknowledgement statement that must be included in publications using data from the given dataset as a stipulation of the conditions of use.", "enum": [ - "Homo sapiens" - ] - }, - "associatedCodeURL": {}, - "associatedDataset": {}, - "dataSubtype": { - "type": "array", - "items": { - "enum": [ - "single-cell", - "pseudobulk", - "single-nucleus", - "none", - "bulk", - "spatial" - ] - }, - "maxItems": 6 - }, - "project": { - "type": "array", - "items": { - "enum": [ - "AIM for RA", - "ELLIPSS", - "LOCKIT", - "STAMP", - "SLE", - "UMass V-CoRT", - "RA" - ] - }, - "maxItems": 7 - }, - "dataType": { - "type": "array", - "items": { - "enum": [ - "immunostaining", - "microbiome", - "immune repertoire profiling", - "metabolomics", - "multimodal", - "proteomics", - "transcriptomics", - "histology", - "epigenomics", - "cytometry", - "lipidomics", - "genomics" - ] - }, - "maxItems": 12 + "syn26710600/wiki/619685" + ], + "title": "acknowledgmentStatement" }, - "assay": { - "type": "array", + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", "items": { "enum": [ - "Olink Target 48", + "ASAPSeq", + "CE-MS", "CITESeq", - "flow cytometry", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", "LC-MS/MS", - "multiplexed ELISA", - "VDJSeq", - "serial IHC", + "NULISA", "Olink Explore HT", - "Visium", - "GenePS SeqFISH", - "ASAPSeq", - "SomaScan", - "Xenium", "Olink Flex", - "Olink Target 96", "Olink Focus", - "CyTOF", - "NULISA", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", + "WES", + "WGS", + "Xenium", "feature barcode sequencing", + "flow cytometry", + "imaging mass cytometry", "imaging mass spectrometry", - "snRNASeq", - "CosMX", "kiloplex", - "RNASeq", + "multiplexed ELISA", "scRNASeq", - "WES", + "scVDJSeq", + "serial IHC", "snATACSeq", - "Olink Reveal", - "CE-MS", - "imaging mass cytometry", - "WGS", - "SNP array", - "H&E", - "scVDJSeq" - ] + "snRNASeq" + ], + "type": "string" + }, + "title": "assay", + "type": "array" + }, + "AssociatedAccession": { + "description": "This is a File and Dataset annotation attribute indicating additional accessions (i.e., unique identifiers) associated with the data when the data has also been submitted to or can be found in other repositories such as GEO, SRA, dbGaP, etc.", + "items": { + "type": "string" }, - "maxItems": 34 + "title": "associatedAccession", + "type": "array" + }, + "AssociatedCodeURL": { + "description": "A URL to the repository where associated code is available.", + "format": "uri", + "title": "associatedCodeURL", + "type": "string" }, - "ImmPortAccession": {}, - "biospecimenType": { - "type": "array", + "AssociatedDataset": { + "description": "The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.", + "items": { + "pattern": "^syn[0-9]{8}", + "type": "string" + }, + "pattern": "^syn[0-9]{8}", + "title": "associatedDataset", + "type": "array" + }, + "BiospecimenSubtype": { + "description": "Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.", + "enum": [ + "FFPE tissue", + "PFA-fixed tissue", + "cell or tissue lysate", + "cell suspension", + "flow-sorted cells", + "fresh tissue", + "frozen tissue", + "nuclei suspension", + "supernatant" + ], + "title": "biospecimenSubtype" + }, + "BiospecimenType": { + "description": "A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.", "items": { "enum": [ - "serum", - "suction blister fluid", "PBMCs", - "uvea", - "skin biopsy", - "plasma", + "cell line", "fibroblast-like synoviocyte", - "urine", - "whole blood", "kidney biopsy", - "synovial tissue", - "total leukocytes", - "stool", - "skin swab", - "saliva", "none", - "salivary gland", + "plasma", "primary cell culture", + "saliva", + "salivary gland", + "serum", + "skin biopsy", + "skin swab", + "stool", "suction blister cells", + "suction blister fluid", "synovial fluid", - "cell line" - ] + "synovial tissue", + "total leukocytes", + "urine", + "uvea", + "whole blood" + ], + "type": "string" }, - "maxItems": 21 - }, - "publicationSynID": {}, - "ARKRelease": { - "enum": [ - "2025.02.R1", - "2025.05.R1", - "2024.07.R1", - "2024.12.R1", - "2025.07.R1", - "2024.10.R1", - "1.0", - "2025.10.R1", - "2025.04.R1", - "2025.09.R1", - "2.0", - "2024.09.R1", - "2025.03.R1", - "2025.01.R1", - "2024.08.R1", - "2025.12.R1", - "2025.08.R1", - "2024.06.R1", - "2025.06.R1", - "2025.11.R1" - ] + "title": "biospecimenType", + "type": "array" }, - "datasetStatus": { - "enum": [ - "test", - "deprecated", - "unreleased", - "released", - "under peer review" - ] + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" }, - "datasetDescription": { - "not": { - "type": "null" + "DataSubtype": { + "description": "General classification to differentiate between omics profiling modalities. If N/A please select 'none'. Multiple selections can be provided in a comma-delimited list, however this is largely only expected in the context of Datasets and files that contain integrated experimental data spanning multiple types.", + "items": { + "enum": [ + "bulk", + "none", + "pseudobulk", + "single-cell", + "single-nucleus", + "spatial" + ], + "type": "string" }, - "minLength": 1 + "title": "dataSubtype", + "type": "array" }, - "associatedAccession": {}, - "Component": { - "not": { - "type": "null" + "DataType": { + "description": "High-level classification of the type of data contained in the file, loosely related to the experimental method or biological entity that is being profiled. Select all that apply using a comma-delimited list, though in most cases only a single label is expected. For multimodal datasets with concomitant profiling of biospecimen include 'multimodal'.", + "items": { + "enum": [ + "cytometry", + "epigenomics", + "genomics", + "histology", + "immune repertoire profiling", + "immunostaining", + "lipidomics", + "metabolomics", + "microbiome", + "multimodal", + "proteomics", + "transcriptomics" + ], + "type": "string" }, - "minLength": 1 + "title": "dataType", + "type": "array" }, - "datasetType": { + "DatasetDescription": { + "description": "A Dataset-specific attribute specifying the synID of the folder that contains a wiki write-up of the dataset description. This wiki content will be surfaced on the ARK Portal frontend site.", + "pattern": "^syn[0-9]{8}", + "title": "datasetDescription", + "type": "string" + }, + "DatasetStatus": { + "description": "A categorical label indicating the status of an ARK Portal dataset. This is applied to improve downstream management of datasets as well as various ETL workflows.", + "enum": [ + "deprecated", + "released", + "test", + "under peer review", + "unreleased" + ], + "title": "datasetStatus" + }, + "DatasetType": { + "description": "High-level classification of dataset entity distinguishing between datasets compiled for a specific publication or as a general data resource.", "enum": [ "experimental", "publication" - ] + ], + "title": "datasetType" }, - "program": { - "enum": [ - "Community Contribution", - "AMP AIM", - "AMP RA/SLE" - ] + "Diagnosis": { + "description": "A high-level classifier indicating the disease status of an individual.", + "items": { + "enum": [ + "At-Risk RA", + "Not Applicable", + "OA", + "RA", + "SLE", + "Sjogren's disease", + "control", + "cutaneous lupus erythematosus", + "dermatomyositis", + "discoid lupus erythematosus", + "lupus nephritis", + "psoriasis", + "psoriatic arthritis", + "scleroderma", + "unknown", + "vitiligo" + ], + "type": "string" + }, + "title": "diagnosis", + "type": "array" + }, + "ImmPortAccession": { + "description": "Accession to corresponding information in ImmPort.", + "pattern": "^SDY", + "title": "ImmPortAccession", + "type": "string" }, - "biospecimenSubtype": { + "Program": { + "description": "Name of the funding program that supported the generation of data and associated files", "enum": [ - "supernatant", - "frozen tissue", - "PFA-fixed tissue", - "FFPE tissue", - "flow-sorted cells", - "cell or tissue lysate", - "fresh tissue", - "nuclei suspension", - "cell suspension", - "" - ] + "AMP AIM", + "AMP RA/SLE", + "Community Contribution" + ], + "title": "program" }, - "programPhase": { - "type": "array", + "ProgramPhase": { + "description": "A label noting which AMP RA/SLE program phase generated the data.", "items": { "enum": [ "I", - "II", - "" - ] + "II" + ], + "type": "string" }, - "maxItems": 2 + "title": "programPhase", + "type": "array" + }, + "Project": { + "description": "A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.", + "items": { + "enum": [ + "AIM for RA", + "ELLIPSS", + "LOCKIT", + "RA", + "SLE", + "STAMP", + "UMass V-CoRT" + ], + "type": "string" + }, + "title": "project", + "type": "array" + }, + "PublicationSynID": { + "description": "The synID of the corresponding Synapse entity that stores metadata about the publication. This is used to differentiate publication-specific files, often consisting of level 4 processed data and expanded subject metadata, in a publication dataset that also includes raw or minimally processed files from experimental datasets. This provides an easy way to distinguish and select for the publication-specific data from which the research findings were derived. When this attribute is used to annotate a Dataset it serves as a way to directly link the Dataset entity with the publication metadata stored in Synapse.", + "pattern": "^syn[0-9]{8}", + "title": "publicationSynID", + "type": "string" + }, + "Species": { + "description": "The genus species of sample or subject origin.", + "enum": [ + "Homo sapiens" + ], + "title": "species" } }, "required": [ - "acknowledgmentStatement", - "diagnosis", - "species", - "dataSubtype", - "project", - "dataType", - "assay", - "biospecimenType", "ARKRelease", - "datasetStatus", - "datasetDescription", + "AcknowledgmentStatement", + "Assay", + "BiospecimenType", "Component", - "datasetType", - "program" + "DataSubtype", + "DataType", + "DatasetDescription", + "DatasetStatus", + "DatasetType", + "Diagnosis", + "Program", + "Project", + "Species" ], - "allOf": [ - { - "if": { - "properties": { - "program": { - "enum": [ - "AMP RA/SLE" - ] - } - }, - "required": [ - "program" - ] - }, - "then": { - "properties": { - "programPhase": { - "type": "array", - "items": { - "enum": [ - "I", - "II" - ] - }, - "maxItems": 2 - } - }, - "required": [ - "programPhase" - ] - } - } - ] + "title": "DatasetAnnotationTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.FCSFileAnnotationTemplate.schema.json b/model_json_schema/ark.FCSFileAnnotationTemplate.schema.json index 66351b4d..4b7cb8c4 100644 --- a/model_json_schema/ark.FCSFileAnnotationTemplate.schema.json +++ b/model_json_schema/ark.FCSFileAnnotationTemplate.schema.json @@ -1,143 +1,168 @@ { + "$id": "http://example.com/FCSFileAnnotationTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "fileFormat": { - "enum": [ - "fcs", - "" - ] - }, - "eventCount": {}, - "specimenModality": { - "enum": [ - "multispecimen", - "single specimen", - "unknown" - ] - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "assay": { - "enum": [ - "CyTOF", - "flow cytometry" - ] - }, - "dataCollectionBatch": {}, - "sampleProcessingBatch": {}, - "individualID": {}, - "biospecimenID": {} - }, - "required": [ - "specimenModality", - "Component", - "assay" - ], "allOf": [ { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "multispecimen" + "Multispecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "dataCollectionBatch": {} + "DataCollectionBatch": { + "not": { + "type": "null" + } + } }, "required": [ - "dataCollectionBatch" + "DataCollectionBatch" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "multispecimen" + "Multispecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "sampleProcessingBatch": {} + "SampleProcessingBatch": { + "not": { + "type": "null" + } + } }, "required": [ - "sampleProcessingBatch" + "SampleProcessingBatch" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "single specimen" + "Singlespecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "individualID": { + "BiospecimenID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "individualID" + "BiospecimenID" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "single specimen" + "Singlespecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "IndividualID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "IndividualID" ] } } - ] + ], + "description": "A template outlining metadata to be collected and applied to FCS file entities as annotations in Synapse.", + "properties": { + "Assay": { + "description": "The technology used to generate the data in this file.", + "enum": [ + "CyTOF", + "flow cytometry" + ], + "title": "assay" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "items": { + "type": "string" + }, + "title": "biospecimenID", + "type": "array" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataCollectionBatch": { + "description": "A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.", + "title": "dataCollectionBatch", + "type": "string" + }, + "EventCount": { + "description": "The total number of events detected and captured in the corresponding FCS file. In cytometry, an event corresponds to particles detected and measured by the instrument. This number corresponds to the number of rows in the FCS file and is expected to be a whole integer number.", + "title": "eventCount", + "type": "integer" + }, + "FileFormat": { + "description": "Standard file format name or file extension", + "enum": [ + "fcs" + ], + "title": "fileFormat" + }, + "IndividualID": { + "description": "Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.", + "items": { + "type": "string" + }, + "title": "individualID", + "type": "array" + }, + "SampleProcessingBatch": { + "description": "A label indicating batching of sample processing or preparation that occurs prior to data collection.", + "title": "sampleProcessingBatch", + "type": "string" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + } + }, + "required": [ + "Assay", + "Component", + "SpecimenModality" + ], + "title": "FCSFileAnnotationTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.FastqFileAnnotationTemplate.schema.json b/model_json_schema/ark.FastqFileAnnotationTemplate.schema.json index 6c3e24cc..60c01a28 100644 --- a/model_json_schema/ark.FastqFileAnnotationTemplate.schema.json +++ b/model_json_schema/ark.FastqFileAnnotationTemplate.schema.json @@ -1,217 +1,239 @@ { + "$id": "http://example.com/FastqFileAnnotationTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "fileFormat": { - "enum": [ - "fastq" - ] - }, - "specimenModality": { - "enum": [ - "multispecimen", - "unknown", - "single specimen" - ] - }, - "readLength": {}, - "assay": { - "type": "array", - "items": { - "enum": [ - "RNASeq", - "ASAPSeq", - "feature barcode sequencing", - "snRNASeq", - "scVDJSeq", - "WGS", - "VDJSeq", - "scRNASeq", - "snATACSeq", - "WES", - "CITESeq" - ] - }, - "maxItems": 11 - }, - "libraryID": {}, - "biospecimenID": {}, - "individualID": {}, - "targetPanel": {}, - "targetPanelSynID": {}, - "targetPanelSize": {} - }, - "required": [ - "Component", - "fileFormat", - "specimenModality", - "assay" - ], "allOf": [ { "if": { "properties": { - "specimenModality": { + "Assay": { "enum": [ - "multispecimen" + "Featurebarcodesequencing" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "libraryID": { + "TargetPanel": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "libraryID" + "TargetPanel" ] } }, { "if": { "properties": { - "specimenModality": { + "Assay": { "enum": [ - "single specimen" + "Featurebarcodesequencing" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "TargetPanelSize": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "TargetPanelSize" ] } }, { "if": { "properties": { - "specimenModality": { + "Assay": { "enum": [ - "single specimen" + "Featurebarcodesequencing" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "individualID": { + "TargetPanelSynID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "individualID" + "TargetPanelSynID" ] } }, { "if": { "properties": { - "assay": { + "SpecimenModality": { "enum": [ - "feature barcode sequencing" + "Multispecimen" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanel": { + "LibraryID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "targetPanel" + "LibraryID" ] } }, { "if": { "properties": { - "assay": { + "SpecimenModality": { "enum": [ - "feature barcode sequencing" + "Singlespecimen" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanelSynID": { + "BiospecimenID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "targetPanelSynID" + "BiospecimenID" ] } }, { "if": { "properties": { - "assay": { + "SpecimenModality": { "enum": [ - "feature barcode sequencing" + "Singlespecimen" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanelSize": {} + "IndividualID": { + "not": { + "type": "null" + } + } }, "required": [ - "targetPanelSize" + "IndividualID" ] } } - ] + ], + "description": "A template outlining metadata to be collected and applied to fastq file entities as annotations in Synapse.", + "properties": { + "Assay": { + "description": "The technology used to generate the data in this file. Select all assays that apply. e.g., the GEX fastq files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "items": { + "enum": [ + "ASAPSeq", + "CITESeq", + "RNASeq", + "VDJSeq", + "WES", + "WGS", + "feature barcode sequencing", + "scRNASeq", + "scVDJSeq", + "snATACSeq", + "snRNASeq" + ], + "type": "string" + }, + "title": "assay", + "type": "array" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "items": { + "type": "string" + }, + "title": "biospecimenID", + "type": "array" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "FileFormat": { + "description": "Standard file format name or file extension", + "enum": [ + "fastq" + ], + "title": "fileFormat" + }, + "IndividualID": { + "description": "Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.", + "items": { + "type": "string" + }, + "title": "individualID", + "type": "array" + }, + "LibraryID": { + "description": "A library label or name, unique within an experiment, used to distinguish sequencing libraries.", + "title": "libraryID", + "type": "string" + }, + "ReadLength": { + "description": "The number of base pairs (bp) sequenced for reads in a fastq file.", + "title": "readLength", + "type": "integer" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + }, + "TargetPanel": { + "description": "A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.", + "title": "targetPanel", + "type": "string" + }, + "TargetPanelSize": { + "description": "The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).", + "title": "targetPanelSize", + "type": "integer" + }, + "TargetPanelSynID": { + "description": "In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.", + "pattern": "^syn[0-9]{8}", + "title": "targetPanelSynID", + "type": "string" + } + }, + "required": [ + "Assay", + "Component", + "FileFormat", + "SpecimenModality" + ], + "title": "FastqFileAnnotationTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.InVitroBiospecimenMetadataTemplate.schema.json b/model_json_schema/ark.InVitroBiospecimenMetadataTemplate.schema.json index 0fe1770c..4c840532 100644 --- a/model_json_schema/ark.InVitroBiospecimenMetadataTemplate.schema.json +++ b/model_json_schema/ark.InVitroBiospecimenMetadataTemplate.schema.json @@ -1,1194 +1,1091 @@ { + "$id": "http://example.com/InVitroBiospecimenMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "program": { - "enum": [ - "AMP RA/SLE", - "AMP AIM", - "Community Contribution" - ] - }, - "biospecimenID": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "notes": {}, - "altSampleID": {}, - "biospecimenType": { - "enum": [ - "fibroblast-like synoviocyte", - "primary cell culture", - "suction blister cells", - "synovial fluid", - "urine", - "salivary gland", - "none", - "skin swab", - "saliva", - "serum", - "PBMCs", - "stool", - "kidney biopsy", - "total leukocytes", - "whole blood", - "cell line", - "skin biopsy", - "synovial tissue", - "plasma", - "suction blister fluid", - "uvea" - ] - }, - "individualID": {}, - "parentBiospecimenID": {}, - "biospecimenSubtype": { - "enum": [ - "FFPE tissue", - "flow-sorted cells", - "cell suspension", - "frozen tissue", - "fresh tissue", - "nuclei suspension", - "supernatant", - "PFA-fixed tissue", - "cell or tissue lysate", - "" - ] - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "project": { - "type": "array", - "items": { - "enum": [ - "LOCKIT", - "RA", - "ELLIPSS", - "AIM for RA", - "SLE", - "STAMP", - "UMass V-CoRT" - ] - }, - "maxItems": 7 - }, - "treatmentTimepoint": {}, - "sampleCollectionBatch": {}, - "treatment": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "visitID": {}, - "primaryCellSource": { - "enum": [ - "synovial tissue", - "pannus-derived epidermis", - "PBMCs", - "pannus-derived dermis", - "total leukocytes", - "urine", - "whole blood", - "salivary gland", - "uvea", - "kidney", - "" - ] - }, - "cellType": {}, - "cellOntologyID": {}, - "anatomicalSite": { - "enum": [ - "right knee joint", - "left wrist joint", - "left knee joint", - "left ankle joint", - "right 2nd MTP joint", - "right wrist joint", - "left hip joint", - "unknown", - "left 2nd MCP joint", - "right 2nd MCP joint", - "other site", - "right ankle joint", - "right hip joint", - "right 3rd MCP joint", - "right 1st MTP joint", - "" - ] - }, - "skinSiteStatus": { - "enum": [ - "non-lesional", - "lesional", - "healthy control", - "lesional proximal", - "" - ] - }, - "synovialCollectionProcedure": { - "enum": [ - "synovectomy", - "biopsy", - "unknown", - "arthroplasty", - "" - ] - }, - "salivaCollectionProcedure": { - "enum": [ - "stimulated", - "unstimulated", - "" - ] - }, - "krennLining": {}, - "krennInflammatory": {}, - "krennSynovitisScore": {}, - "krennStroma": {}, - "FACSPopulation": {}, - "userDefinedCellType": {} - }, - "required": [ - "program", - "biospecimenID", - "biospecimenType", - "Component", - "project", - "treatment" - ], "allOf": [ { "if": { "properties": { - "program": { + "BiospecimenSubtype": { "enum": [ - "AMP RA/SLE" + "Cellsuspension" ] } - }, - "required": [ - "program" - ] + } }, "then": { "properties": { - "visitID": { + "CellOntologyID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "visitID" + "CellOntologyID" ] } }, { "if": { "properties": { - "program": { + "BiospecimenSubtype": { "enum": [ - "AMP AIM" + "Flow-sortedcells" ] } - }, - "required": [ - "program" - ] + } }, "then": { "properties": { - "visitID": { + "CellOntologyID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "visitID" + "CellOntologyID" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "primary cell culture" + "Cellline" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "primaryCellSource": { - "enum": [ - "synovial tissue", - "pannus-derived epidermis", - "PBMCs", - "pannus-derived dermis", - "total leukocytes", - "urine", - "whole blood", - "salivary gland", - "uvea", - "kidney" - ] + "CellOntologyID": { + "not": { + "type": "null" + } } }, "required": [ - "primaryCellSource" + "CellOntologyID" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "primary cell culture" + "Primarycellculture" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "cellType": {} + "CellOntologyID": { + "not": { + "type": "null" + } + } }, "required": [ - "cellType" + "CellOntologyID" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenSubtype": { "enum": [ - "cell line" + "Cellsuspension" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "cellType": {} + "CellType": { + "not": { + "type": "null" + } + } }, "required": [ - "cellType" + "CellType" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "BiospecimenSubtype": { "enum": [ - "flow-sorted cells" + "Flow-sortedcells" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "cellType": {} + "CellType": { + "not": { + "type": "null" + } + } }, "required": [ - "cellType" + "CellType" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "BiospecimenType": { "enum": [ - "cell suspension" + "Cellline" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "cellType": {} + "CellType": { + "not": { + "type": "null" + } + } }, "required": [ - "cellType" + "CellType" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "primary cell culture" + "Primarycellculture" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "cellOntologyID": {} + "CellType": { + "not": { + "type": "null" + } + } }, "required": [ - "cellOntologyID" + "CellType" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenSubtype": { "enum": [ - "cell line" + "Cellsuspension" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "cellOntologyID": {} + "UserDefinedCellType": { + "not": { + "type": "null" + } + } }, "required": [ - "cellOntologyID" + "UserDefinedCellType" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "BiospecimenSubtype": { "enum": [ - "flow-sorted cells" + "Flow-sortedcells" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "cellOntologyID": {} + "UserDefinedCellType": { + "not": { + "type": "null" + } + } }, "required": [ - "cellOntologyID" + "UserDefinedCellType" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "BiospecimenSubtype": { "enum": [ - "cell suspension" + "Flow-sortedcells" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "cellOntologyID": {} + "FACSPopulation": { + "not": { + "type": "null" + } + } }, "required": [ - "cellOntologyID" + "FACSPopulation" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "suction blister cells" + "Primarycellculture" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right knee joint", - "left wrist joint", - "left knee joint", - "left ankle joint", - "right 2nd MTP joint", - "right wrist joint", - "left hip joint", - "unknown", - "left 2nd MCP joint", - "right 2nd MCP joint", - "other site", - "right ankle joint", - "right hip joint", - "right 3rd MCP joint", - "right 1st MTP joint" - ] + "PrimaryCellSource": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "PrimaryCellSource" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "synovial fluid" + "Saliva" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right knee joint", - "left wrist joint", - "left knee joint", - "left ankle joint", - "right 2nd MTP joint", - "right wrist joint", - "left hip joint", - "unknown", - "left 2nd MCP joint", - "right 2nd MCP joint", - "other site", - "right ankle joint", - "right hip joint", - "right 3rd MCP joint", - "right 1st MTP joint" - ] + "SalivaCollectionProcedure": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "SalivaCollectionProcedure" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "skin swab" + "Skinbiopsy" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right knee joint", - "left wrist joint", - "left knee joint", - "left ankle joint", - "right 2nd MTP joint", - "right wrist joint", - "left hip joint", - "unknown", - "left 2nd MCP joint", - "right 2nd MCP joint", - "other site", - "right ankle joint", - "right hip joint", - "right 3rd MCP joint", - "right 1st MTP joint" - ] + "AnatomicalSite": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "skin biopsy" + "Skinswab" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right knee joint", - "left wrist joint", - "left knee joint", - "left ankle joint", - "right 2nd MTP joint", - "right wrist joint", - "left hip joint", - "unknown", - "left 2nd MCP joint", - "right 2nd MCP joint", - "other site", - "right ankle joint", - "right hip joint", - "right 3rd MCP joint", - "right 1st MTP joint" - ] + "AnatomicalSite": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "AnatomicalSite" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Suctionblistercells" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right knee joint", - "left wrist joint", - "left knee joint", - "left ankle joint", - "right 2nd MTP joint", - "right wrist joint", - "left hip joint", - "unknown", - "left 2nd MCP joint", - "right 2nd MCP joint", - "other site", - "right ankle joint", - "right hip joint", - "right 3rd MCP joint", - "right 1st MTP joint" - ] + "AnatomicalSite": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "AnatomicalSite" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Suctionblisterfluid" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right knee joint", - "left wrist joint", - "left knee joint", - "left ankle joint", - "right 2nd MTP joint", - "right wrist joint", - "left hip joint", - "unknown", - "left 2nd MCP joint", - "right 2nd MCP joint", - "other site", - "right ankle joint", - "right hip joint", - "right 3rd MCP joint", - "right 1st MTP joint" - ] + "AnatomicalSite": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "suction blister fluid" + "Synovialfluid" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "anatomicalSite": { - "enum": [ - "right knee joint", - "left wrist joint", - "left knee joint", - "left ankle joint", - "right 2nd MTP joint", - "right wrist joint", - "left hip joint", - "unknown", - "left 2nd MCP joint", - "right 2nd MCP joint", - "other site", - "right ankle joint", - "right hip joint", - "right 3rd MCP joint", - "right 1st MTP joint" - ] + "AnatomicalSite": { + "not": { + "type": "null" + } } }, "required": [ - "anatomicalSite" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "suction blister cells" + "Synovialtissue" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "skinSiteStatus": { - "enum": [ - "non-lesional", - "lesional", - "healthy control", - "lesional proximal" - ] + "AnatomicalSite": { + "not": { + "type": "null" + } } }, "required": [ - "skinSiteStatus" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenType": { + "PrimaryCellSource": { "enum": [ - "skin swab" + "Synovialtissue" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "skinSiteStatus": { - "enum": [ - "non-lesional", - "lesional", - "healthy control", - "lesional proximal" - ] + "AnatomicalSite": { + "not": { + "type": "null" + } } }, "required": [ - "skinSiteStatus" + "AnatomicalSite" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "skin biopsy" + "Skinbiopsy" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "skinSiteStatus": { - "enum": [ - "non-lesional", - "lesional", - "healthy control", - "lesional proximal" - ] + "SkinSiteStatus": { + "not": { + "type": "null" + } } }, "required": [ - "skinSiteStatus" + "SkinSiteStatus" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "suction blister fluid" + "Skinswab" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "skinSiteStatus": { - "enum": [ - "non-lesional", - "lesional", - "healthy control", - "lesional proximal" - ] + "SkinSiteStatus": { + "not": { + "type": "null" + } } }, "required": [ - "skinSiteStatus" + "SkinSiteStatus" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "synovial fluid" + "Suctionblistercells" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "synovialCollectionProcedure": { - "enum": [ - "synovectomy", - "biopsy", - "unknown", - "arthroplasty" - ] + "SkinSiteStatus": { + "not": { + "type": "null" + } } }, "required": [ - "synovialCollectionProcedure" + "SkinSiteStatus" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Suctionblisterfluid" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "synovialCollectionProcedure": { - "enum": [ - "synovectomy", - "biopsy", - "unknown", - "arthroplasty" - ] + "SkinSiteStatus": { + "not": { + "type": "null" + } } }, "required": [ - "synovialCollectionProcedure" + "SkinSiteStatus" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Synovialfluid" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "synovialCollectionProcedure": { - "enum": [ - "synovectomy", - "biopsy", - "unknown", - "arthroplasty" - ] + "SynovialCollectionProcedure": { + "not": { + "type": "null" + } } }, "required": [ - "synovialCollectionProcedure" + "SynovialCollectionProcedure" ] } }, { "if": { "properties": { - "biospecimenType": { + "BiospecimenType": { "enum": [ - "saliva" + "Synovialtissue" ] } - }, - "required": [ - "biospecimenType" - ] + } }, "then": { "properties": { - "salivaCollectionProcedure": { - "enum": [ - "stimulated", - "unstimulated" - ] + "SynovialCollectionProcedure": { + "not": { + "type": "null" + } } }, "required": [ - "salivaCollectionProcedure" + "SynovialCollectionProcedure" ] } }, { "if": { "properties": { - "primaryCellSource": { + "PrimaryCellSource": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennLining": {} + "SynovialCollectionProcedure": { + "not": { + "type": "null" + } + } }, "required": [ - "krennLining" + "SynovialCollectionProcedure" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennLining": {} + "KrennInflammatory": { + "not": { + "type": "null" + } + } }, "required": [ - "krennLining" + "KrennInflammatory" ] } }, { "if": { "properties": { - "primaryCellSource": { + "PrimaryCellSource": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennInflammatory": {} + "KrennInflammatory": { + "not": { + "type": "null" + } + } }, "required": [ - "krennInflammatory" + "KrennInflammatory" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennInflammatory": {} + "KrennLining": { + "not": { + "type": "null" + } + } }, "required": [ - "krennInflammatory" + "KrennLining" ] } }, { "if": { "properties": { - "primaryCellSource": { + "PrimaryCellSource": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennSynovitisScore": {} + "KrennLining": { + "not": { + "type": "null" + } + } }, "required": [ - "krennSynovitisScore" + "KrennLining" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennSynovitisScore": {} + "KrennStroma": { + "not": { + "type": "null" + } + } }, "required": [ - "krennSynovitisScore" + "KrennStroma" ] } }, { "if": { "properties": { - "primaryCellSource": { + "PrimaryCellSource": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennStroma": {} + "KrennStroma": { + "not": { + "type": "null" + } + } }, "required": [ - "krennStroma" + "KrennStroma" ] } }, { "if": { "properties": { - "primaryCellSource": { + "BiospecimenType": { "enum": [ - "synovial tissue" + "Synovialtissue" ] } - }, - "required": [ - "primaryCellSource" - ] + } }, "then": { "properties": { - "krennStroma": {} + "KrennSynovitisScore": { + "not": { + "type": "null" + } + } }, "required": [ - "krennStroma" + "KrennSynovitisScore" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "PrimaryCellSource": { "enum": [ - "flow-sorted cells" + "Synovialtissue" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "FACSPopulation": { + "KrennSynovitisScore": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "FACSPopulation" + "KrennSynovitisScore" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "Program": { "enum": [ - "flow-sorted cells" + "AMPAIM" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "userDefinedCellType": {} + "VisitID": { + "not": { + "type": "null" + } + } }, "required": [ - "userDefinedCellType" + "VisitID" ] } }, { "if": { "properties": { - "biospecimenSubtype": { + "Program": { "enum": [ - "cell suspension" + "AMPRA/SLE" ] } - }, - "required": [ - "biospecimenSubtype" - ] + } }, "then": { "properties": { - "userDefinedCellType": {} + "VisitID": { + "not": { + "type": "null" + } + } }, "required": [ - "userDefinedCellType" + "VisitID" ] } } - ] + ], + "description": "A template outlining metadata to be collected for biospecimen used for an in vitro experiment.", + "properties": { + "AltSampleID": { + "description": "An alternate identifier for a sample. With some assays there can be default or alternate sample identifiers entered into the data collection software. If you will be uploading data collected with an alternate identifier please provide that here. In some cases, the ARK BDM team will use this field to capture original but out-dated identifiers for samples.", + "title": "altSampleID", + "type": "string" + }, + "AnatomicalSite": { + "description": "The anatomical site, i.e., location on or within the body, from which the biospecimen was collected. This attribute is required depending on the value selected for biospecimenType.", + "enum": [ + "left 2nd MCP joint", + "left ankle joint", + "left hip joint", + "left knee joint", + "left wrist joint", + "other site", + "right 1st MTP joint", + "right 2nd MCP joint", + "right 2nd MTP joint", + "right 3rd MCP joint", + "right ankle joint", + "right hip joint", + "right knee joint", + "right wrist joint", + "unknown" + ], + "title": "anatomicalSite" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "biospecimenID", + "type": "string" + }, + "BiospecimenSubtype": { + "description": "Biospecimen status before sample is processed into a scRNA-seq library. Several scRNA-seq technologies support a variety of sample processing methods which can introduces sources of technical variation.", + "enum": [ + "FFPE tissue", + "PFA-fixed tissue", + "cell or tissue lysate", + "cell suspension", + "flow-sorted cells", + "fresh tissue", + "frozen tissue", + "nuclei suspension", + "supernatant" + ], + "title": "biospecimenSubtype" + }, + "BiospecimenType": { + "description": "A label indicating the biological material collected for experimentation and data collection. Where applicable, provide all types in a comma-separated list.", + "enum": [ + "PBMCs", + "cell line", + "fibroblast-like synoviocyte", + "kidney biopsy", + "none", + "plasma", + "primary cell culture", + "saliva", + "salivary gland", + "serum", + "skin biopsy", + "skin swab", + "stool", + "suction blister cells", + "suction blister fluid", + "synovial fluid", + "synovial tissue", + "total leukocytes", + "urine", + "uvea", + "whole blood" + ], + "title": "biospecimenType" + }, + "CellOntologyID": { + "description": "Cell Ontology CL identifier that best describes a biopsecimen used to generate data, e.g., CL:0000084 for T cell.", + "pattern": "^CL:", + "title": "cellOntologyID", + "type": "string" + }, + "CellType": { + "description": "The cell type name from Cell Ontology for the corresponding CL identifier.", + "title": "cellType", + "type": "string" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "FACSPopulation": { + "description": "A description of the marker gating strategy used to derive the population cells with FACS.", + "title": "FACSPopulation", + "type": "string" + }, + "IndividualID": { + "description": "Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.", + "title": "individualID", + "type": "string" + }, + "KrennInflammatory": { + "description": "A standardized, semi-quantitative measure of the degree of inflammatory infiltrate in synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.", + "title": "krennInflammatory", + "type": "number" + }, + "KrennLining": { + "description": "A standardized, semi-quantitative measure of the degree of hyperplasia/enlargement of the synovial lining layer of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.", + "title": "krennLining", + "type": "number" + }, + "KrennStroma": { + "description": "A standardized, semi-quantitative measure of stromal cell density of synovial specimen. This is one of three measures used to derive the Krenn Synovitis Score (KSS) which is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, enter '-1'.", + "title": "krennStroma", + "type": "number" + }, + "KrennSynovitisScore": { + "description": "The Krenn Synovitis Score (KSS) is a histological scoring system used to quantify inflammation in synovial tissue biopsies, particularly in the context of inflammatory arthritis like rheumatoid arthritis. If value unknown, use '-1'.", + "title": "krennSynovitisScore", + "type": "number" + }, + "Notes": { + "description": "Please use this attribute to captured pertinent details not otherwise captured. This is an unstructured text entry, please be concise.", + "title": "notes", + "type": "string" + }, + "ParentBiospecimenID": { + "description": "The biospecimenID associated with the originating biospecimen for derived or child biospecimens.", + "title": "parentBiospecimenID", + "type": "string" + }, + "PrimaryCellSource": { + "description": "A label indicating the biological source material from which a primary cell culture was derived.", + "enum": [ + "PBMCs", + "kidney", + "pannus-derived dermis", + "pannus-derived epidermis", + "salivary gland", + "synovial tissue", + "total leukocytes", + "urine", + "uvea", + "whole blood" + ], + "title": "primaryCellSource" + }, + "Program": { + "description": "Name of the funding program that supported the generation of data and associated files", + "enum": [ + "AMP AIM", + "AMP RA/SLE", + "Community Contribution" + ], + "title": "program" + }, + "Project": { + "description": "A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.", + "enum": [ + "AIM for RA", + "ELLIPSS", + "LOCKIT", + "RA", + "SLE", + "STAMP", + "UMass V-CoRT" + ], + "title": "project" + }, + "SalivaCollectionProcedure": { + "description": "Classification of saliva collection procedure. This is a conditionally dependent attribute triggered for saliva `biospecimenType`.", + "enum": [ + "stimulated", + "unstimulated" + ], + "title": "salivaCollectionProcedure" + }, + "SampleCollectionBatch": { + "description": "A label indicating batching of sample collection or experiment execution that occurs prior to data collection.", + "title": "sampleCollectionBatch", + "type": "string" + }, + "SkinSiteStatus": { + "description": "Disease manifestation status of skin biospecimen.", + "enum": [ + "healthy control", + "lesional", + "lesional proximal", + "non-lesional" + ], + "title": "skinSiteStatus" + }, + "SynovialCollectionProcedure": { + "description": "Classification of procedure for synovial tissue collection.", + "enum": [ + "arthroplasty", + "biopsy", + "synovectomy", + "unknown" + ], + "title": "synovialCollectionProcedure" + }, + "Treatment": { + "description": "A short descriptive label indicating what experimental treatments have been applied to a biospecimen before data capture. Please include the word 'control' to indicate specific treatments that are intended to serve as a control group.", + "title": "treatment", + "type": "string" + }, + "TreatmentTimepoint": { + "description": "Where applicable, specify the timepoint relative to treatment to distinguish different sets of samples that have undergone the same treatment but for different lengths of time. REQUIRED: please specify the unit of time, e.g., secs, mins, hrs, days, etc.", + "title": "treatmentTimepoint", + "type": "string" + }, + "UserDefinedCellType": { + "description": "User-defined label of a cell type. Contributors are provided this option to use preferred or custom cell type labels that may vary from options and standards set by Cell Ontology.", + "title": "userDefinedCellType", + "type": "string" + }, + "VisitID": { + "description": "Ordinal ID distinguishing different patient visits.", + "title": "visitID", + "type": "string" + } + }, + "required": [ + "BiospecimenID", + "BiospecimenType", + "Component", + "IndividualID", + "Program", + "Project", + "Treatment" + ], + "title": "InVitroBiospecimenMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.OlinkAssayMetadataTemplate.schema.json b/model_json_schema/ark.OlinkAssayMetadataTemplate.schema.json index 735b1cd6..60365ad2 100644 --- a/model_json_schema/ark.OlinkAssayMetadataTemplate.schema.json +++ b/model_json_schema/ark.OlinkAssayMetadataTemplate.schema.json @@ -1,64 +1,72 @@ { + "$id": "http://example.com/OlinkAssayMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", + "description": "A template outlining assay metadata to be collected for each plate in an Olink dataset.", "properties": { - "targetPanelSynID": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "targetPanel": { - "not": { - "type": "null" - }, - "minLength": 1 + "Assay": { + "description": "The technology used to generate the data in this file.", + "enum": [ + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96" + ], + "title": "assay" }, "Component": { - "not": { - "type": "null" + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "PlateID": { + "description": "An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.", + "items": { + "type": "string" }, - "minLength": 1 + "title": "plateID", + "type": "array" }, - "targetPanelSize": {}, - "platform": { - "type": "array", + "Platform": { + "description": "The specific instrument (manufacturer, model, etc.) that was used to carry out a laboratory or computational experiment.", "items": { "enum": [ - "Illumina NovaSeq 6000", "Fluidigm BioMark", - "Olink Signature Q100", - "unknown", "Illumina NextSeq 500", - "" - ] + "Illumina NovaSeq 6000", + "Olink Signature Q100", + "unknown" + ], + "type": "string" }, - "maxItems": 5 + "title": "platform", + "type": "array" }, - "plateID": { - "not": { - "type": "null" - }, - "minLength": 1 + "TargetPanel": { + "description": "A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.", + "title": "targetPanel", + "type": "string" }, - "assay": { - "enum": [ - "Olink Flex", - "Olink Target 96", - "Olink Focus", - "Olink Reveal", - "Olink Explore HT", - "Olink Target 48" - ] + "TargetPanelSize": { + "description": "The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).", + "title": "targetPanelSize", + "type": "integer" + }, + "TargetPanelSynID": { + "description": "In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.", + "pattern": "^syn[0-9]{8}", + "title": "targetPanelSynID", + "type": "string" } }, "required": [ - "targetPanelSynID", - "targetPanel", + "Assay", "Component", - "plateID", - "assay" - ] + "PlateID", + "TargetPanel", + "TargetPanelSynID" + ], + "title": "OlinkAssayMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.OlinkFileAnnotationTemplate.schema.json b/model_json_schema/ark.OlinkFileAnnotationTemplate.schema.json index 652bfc34..ba4cc0b0 100644 --- a/model_json_schema/ark.OlinkFileAnnotationTemplate.schema.json +++ b/model_json_schema/ark.OlinkFileAnnotationTemplate.schema.json @@ -1,78 +1,86 @@ { + "$id": "http://example.com/OlinkFileAnnotationTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", + "description": "A template outlining metadata to be provided by contributors that is applied as a preliminary set of annotations to Olink dataset files.", "properties": { - "fileFormat": { + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "FileFormat": { + "description": "Standard file format name or file extension", "enum": [ - "csv", - "geojson", - "svs", - "mtx", - "fastq", - "fcs", - "xlsx", - "tgz", - "czi", - "tsv", "bai", + "bam", "bed", - "pdf", + "bim", + "csv", + "czi", "docx", - "tbi", - "zip", + "dose", "erate", - "rds", + "fam", + "fastq", + "fcs", + "geojson", "h5", - "vcf", - "parquet", - "xls", - "rec", "h5ad", + "info", "mcd", - "txt", + "mtx", + "parquet", + "pdf", "py", - "bam", - "bim", - "dose", - "info", - "fam" - ] - }, - "specimenModality": { - "enum": [ - "unknown", - "multispecimen", - "single specimen" - ] + "rds", + "rec", + "svs", + "tbi", + "tgz", + "tsv", + "txt", + "vcf", + "xls", + "xlsx", + "zip" + ], + "title": "fileFormat" }, - "plateID": { - "not": { - "type": "null" + "PlateID": { + "description": "An identifier assigned to a multi-well plate. Certain data types and assays profile samples using multi-well plates. Knowing which samples were profiled on each plate is important for establishing sample provenance, finding the right data files for a specific set of samples, as well as downstream exploratory data analysis and QC work particularly regarding identification and correction of batch effects.", + "items": { + "type": "string" }, - "minLength": 1 + "title": "plateID", + "type": "array" }, - "resourceType": { + "ResourceType": { + "description": "High-level classification of the file content", "enum": [ - "metadata", "code", "experimental data", - "figure" - ] + "figure", + "metadata" + ], + "title": "resourceType" }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" } }, "required": [ - "fileFormat", - "specimenModality", - "plateID", - "resourceType", - "Component" - ] + "Component", + "FileFormat", + "PlateID", + "ResourceType", + "SpecimenModality" + ], + "title": "OlinkFileAnnotationTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.PublicationMetadataTemplate.schema.json b/model_json_schema/ark.PublicationMetadataTemplate.schema.json index 8e29be21..17c210bc 100644 --- a/model_json_schema/ark.PublicationMetadataTemplate.schema.json +++ b/model_json_schema/ark.PublicationMetadataTemplate.schema.json @@ -1,137 +1,147 @@ { + "$id": "http://example.com/PublicationMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", + "allOf": [ + { + "if": { + "properties": { + "Program": { + "enum": [ + "AMPRA/SLE" + ] + } + } + }, + "then": { + "properties": { + "ProgramPhase": { + "not": { + "type": "null" + } + } + }, + "required": [ + "ProgramPhase" + ] + } + } + ], + "description": "A template outlining metadata to use as annotations for Publication ‘file’ entities.", "properties": { - "PMID": { - "not": { - "type": "null" + "AssociatedDataset": { + "description": "The synID of a Dataset entity. This serves to link other Synapse entities to Dataset entities. When used to annotate a publication Dataset this attribute should include the synID for an experimental Datasets from which the publication data was derived. Multiple synID can be specified using a comma-delimited list.", + "items": { + "pattern": "^syn[0-9]{8}", + "type": "string" }, - "minLength": 1 + "pattern": "^syn[0-9]{8}", + "title": "associatedDataset", + "type": "array" }, - "associatedDataset": {}, "Component": { - "not": { - "type": "null" - }, - "minLength": 1 + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" }, - "publicationDate": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "publicationType": { - "enum": [ - "correction", - "pre-print", - "peer-reviewed" - ] + "DOI": { + "description": "Digital object identifier", + "format": "uri-reference", + "title": "DOI", + "type": "string" }, - "title": { - "not": { - "type": "null" - }, - "minLength": 1 + "Journal": { + "description": "Journal in which the publication was released", + "title": "journal", + "type": "string" }, - "DOI": { - "not": { - "type": "null" - }, - "minLength": 1 + "PMCID": { + "description": "Pubmed Central Identifier, formatted as a compact URI string that will automatically resolve into a URL based on the Synapse bioregistry; e.g., pmc:PMCxxxxxxxx", + "pattern": "^pmc:PMC[0-9]{8}", + "title": "PMCID", + "type": "string" }, - "PMCID": {}, - "year": { - "not": { - "type": "null" - }, - "minLength": 1 + "PMID": { + "description": "PubMed(R) Identifier", + "pattern": "^PMID", + "title": "PMID", + "type": "string" }, - "program": { + "Program": { + "description": "Name of the funding program that supported the generation of data and associated files", "enum": [ "AMP AIM", - "Community Contribution", - "AMP RA/SLE" - ] + "AMP RA/SLE", + "Community Contribution" + ], + "title": "program" }, - "journal": { - "not": { - "type": "null" + "ProgramPhase": { + "description": "A label noting which AMP RA/SLE program phase generated the data.", + "items": { + "enum": [ + "I", + "II" + ], + "type": "string" }, - "minLength": 1 + "title": "programPhase", + "type": "array" }, - "project": { - "type": "array", + "Project": { + "description": "A sub-level attribute of `program` specifying a research initiative working to investigate particular hypotheses.", "items": { "enum": [ + "AIM for RA", "ELLIPSS", - "SLE", "LOCKIT", - "STAMP", "RA", - "AIM for RA", + "SLE", + "STAMP", "UMass V-CoRT" - ] + ], + "type": "string" }, - "maxItems": 7 + "title": "project", + "type": "array" }, - "programPhase": { - "type": "array", - "items": { - "enum": [ - "I", - "II", - "" - ] - }, - "maxItems": 2 + "PublicationDate": { + "description": "The publication date extracted from PubMed database", + "title": "publicationDate", + "type": "string" + }, + "PublicationType": { + "description": "General classification of publication.", + "enum": [ + "correction", + "peer-reviewed", + "pre-print" + ], + "title": "publicationType" + }, + "Title": { + "description": "Title of the publication.", + "title": "title", + "type": "string" + }, + "Year": { + "description": "Year (YYYY) in which the paper was published.", + "pattern": "[1-2][0-9]{3}", + "title": "year", + "type": "string" } }, "required": [ - "PMID", "Component", - "publicationDate", - "publicationType", - "title", "DOI", - "year", - "program", - "journal", - "project" + "Journal", + "PMID", + "Program", + "Project", + "PublicationDate", + "PublicationType", + "Title", + "Year" ], - "allOf": [ - { - "if": { - "properties": { - "program": { - "enum": [ - "AMP RA/SLE" - ] - } - }, - "required": [ - "program" - ] - }, - "then": { - "properties": { - "programPhase": { - "type": "array", - "items": { - "enum": [ - "I", - "II" - ] - }, - "maxItems": 2 - } - }, - "required": [ - "programPhase" - ] - } - } - ] + "title": "PublicationMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.ScRNASeqAssayMetadataTemplate.schema.json b/model_json_schema/ark.ScRNASeqAssayMetadataTemplate.schema.json index 04779334..05db00ac 100644 --- a/model_json_schema/ark.ScRNASeqAssayMetadataTemplate.schema.json +++ b/model_json_schema/ark.ScRNASeqAssayMetadataTemplate.schema.json @@ -1,345 +1,368 @@ { + "$id": "http://example.com/ScRNASeqAssayMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", + "allOf": [ + { + "if": { + "properties": { + "LibraryPrepMethod": { + "enum": [ + "10xGEM-XFlexGeneExpressionHuman" + ] + } + } + }, + "then": { + "properties": { + "10xProbeSetReference": { + "not": { + "type": "null" + } + } + }, + "required": [ + "10xProbeSetReference" + ] + } + }, + { + "if": { + "properties": { + "SpecimenModality": { + "enum": [ + "Multispecimen" + ] + } + } + }, + "then": { + "properties": { + "LibraryID": { + "not": { + "type": "null" + } + } + }, + "required": [ + "LibraryID" + ] + } + }, + { + "if": { + "properties": { + "SpecimenModality": { + "enum": [ + "Singlespecimen" + ] + } + } + }, + "then": { + "properties": { + "BiospecimenID": { + "not": { + "type": "null" + } + } + }, + "required": [ + "BiospecimenID" + ] + } + } + ], + "description": "A template outlining metadata to be collected for each library in a scRNA-seq dataset.", "properties": { - "alignmentReference": { + "10xProbeSetReference": { + "description": "Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.", + "enum": [ + "Flex Human Transcriptome Probe Set v1.0.1", + "Flex Human Transcriptome Probe Set v1.1.0", + "Visium Human Transcriptome v1", + "Visium Human Transcriptome v2", + "custom probe set" + ], + "title": "10xProbeSetReference" + }, + "AlignmentReference": { + "description": "The genomic/transcriptomic reference used for performing read alignment against.", "enum": [ + "10x Cell Ranger Human GRCh38 2020-A", + "10x Cell Ranger Human GRCh38 2024-A", + "GRCh38", "modified GRCh38", "unknown", - "GRCh38", - "10x Cell Ranger Human GRCh38 2020-A", - "vdj_GRCh38_alts_ensembl-4.0.0", - "10x Cell Ranger Human GRCh38 2024-A" - ] + "vdj_GRCh38_alts_ensembl-4.0.0" + ], + "title": "alignmentReference" }, - "sampleProcessingBatch": {}, - "dataCollectionBatch": {}, - "assay": { - "type": "array", + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", "items": { "enum": [ - "Olink Reveal", - "Olink Flex", - "snRNASeq", - "Olink Explore HT", - "CyTOF", + "ASAPSeq", "CE-MS", + "CITESeq", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", + "LC-MS/MS", + "NULISA", + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", "WES", + "WGS", "Xenium", "feature barcode sequencing", "flow cytometry", - "RNASeq", - "WGS", - "SomaScan", - "kiloplex", - "GenePS SeqFISH", - "snATACSeq", - "VDJSeq", - "Olink Target 48", - "Olink Focus", - "NULISA", - "SNP array", - "imaging mass spectrometry", - "scRNASeq", - "Olink Target 96", - "H&E", - "LC-MS/MS", "imaging mass cytometry", - "CosMX", + "imaging mass spectrometry", + "kiloplex", "multiplexed ELISA", - "serial IHC", + "scRNASeq", "scVDJSeq", - "CITESeq", - "ASAPSeq", - "Visium" - ] + "serial IHC", + "snATACSeq", + "snRNASeq" + ], + "type": "string" }, - "maxItems": 34 + "title": "assay", + "type": "array" }, - "inputCellCount": { - "not": { - "type": "null" + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "items": { + "type": "string" }, - "minLength": 1 + "title": "biospecimenID", + "type": "array" }, - "libraryPrepMethod": { + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataCollectionBatch": { + "description": "A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.", + "title": "dataCollectionBatch", + "type": "string" + }, + "InputCellCount": { + "description": "An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.", + "title": "inputCellCount", + "type": "integer" + }, + "LibraryID": { + "description": "A library label or name, unique within an experiment, used to distinguish sequencing libraries.", + "title": "libraryID", + "type": "string" + }, + "LibraryPrepMethod": { + "description": "Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.", "enum": [ + "10x Chromium Fixed RNA Human Transcriptome", "10x Chromium GEM-X Single Cell 3' v4", - "Nextera XT", + "10x Chromium GEM-X Single Cell 5' v3", + "10x Chromium Next GEM Single Cell 3'", + "10x Chromium Next GEM Single Cell 3' 3.1", "10x Chromium Next GEM Single Cell 5' v1.1", + "10x Chromium Next GEM Single Cell 5' v2", + "10x Chromium Next GEM Single Cell ATAC v2", + "10x Chromium Single Cell Human BCR", + "10x Chromium Single Cell Human TCR", + "10x GEM-X Flex Gene Expression Human", + "10x GEM-X Universal 5' Gene Expression v3", + "CEL-Seq2", + "Chromium Next GEM Single Cell ATAC v1.1", + "Fluidigm C1 HT", + "NEBNext Human Immune Sequencing Kit", + "NEBNext Ultra II Directional RNA Library", + "Nextera XT", "Nextera XT DNA", - "in-house library prep", + "QIAseq miRNA Library", + "SMART-Seq Human BCR with UMI", "SMART-Seq Human TCR with UMI", - "10x GEM-X Flex Gene Expression Human", + "SMART-Seq v4 Ultra Low Input RNA", "SMARTer Stranded Total RNA v2", - "10x Chromium Next GEM Single Cell 5' v2", - "10x Chromium Single Cell Human TCR", "Takara Human BCR profiling for Illumina", - "SMART-Seq Human BCR with UMI", - "TruSeq Stranded mRNA", - "NEBNext Ultra II Directional RNA Library", - "10x Chromium Next GEM Single Cell 3'", - "Fluidigm C1 HT", + "Takara Human TCR profiling for Illumina", "Takara Human TCRv2 profiling for Illumina", - "10x Chromium Next GEM Single Cell 3' 3.1", "Takara Human scTCR profiling for Illumina", + "TruSeq Stranded mRNA", "custom DASH-treatment", - "10x Chromium Fixed RNA Human Transcriptome", - "QIAseq miRNA Library", - "10x Chromium Single Cell Human BCR", - "CEL-Seq2", - "10x GEM-X Universal 5' Gene Expression v3", - "10x Chromium GEM-X Single Cell 5' v3", - "SMART-Seq v4 Ultra Low Input RNA", - "Chromium Next GEM Single Cell ATAC v1.1", - "10x Chromium Next GEM Single Cell ATAC v2", - "Takara Human TCR profiling for Illumina", - "NEBNext Human Immune Sequencing Kit" - ] + "in-house library prep" + ], + "title": "libraryPrepMethod" }, - "totalReads": { - "not": { - "type": "null" + "NucleicAcidSource": { + "description": "The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.", + "items": { + "enum": [ + "BCR mRNA", + "CRISPR protospacer feature barcode", + "TCR mRNA", + "Tn5-accessible gDNA", + "antigen capture barcode", + "gDNA", + "globin-depleted RNA", + "intracellular protein feature barcode", + "multiplexing oligo", + "poly(A) RNA", + "rRNA-depleted RNA", + "surface protein feature barcode" + ], + "type": "string" }, - "minLength": 1 + "title": "nucleicAcidSource", + "type": "array" }, - "sequencingSaturation": {}, - "softwareAndVersion": { - "enum": [ - "Cell Ranger v3.1.0", - "Space Ranger 3.0.1", - "Space Ranger 3.1.2", - "demuxlet", - "Cell Ranger v6.0.1", - "Cell Ranger v7.0.1", - "Cell Ranger ATAC v1.1.0", - "Cell Ranger v6.0.0", - "Cell Ranger v8.0.0", - "Cell Ranger v3.0.1", - "Cell Ranger v5.0.0", - "BD FACSDiva 8.0.1", - "Cell Ranger v3.0.2", - "Cell Ranger v9.0.0", - "Cell Ranger v4.0.0", - "Space Ranger 3.0.0", - "Cell Ranger v6.1.0", - "Space Ranger 3.1.3", - "Cell Ranger v5.0.1", - "Cell Ranger v7.2.0", - "Cell Ranger v6.0.2", - "Cell Ranger v3.0.0", - "Cell Ranger v7.0.0", - "Cell Ranger v7.1.0", - "Cell Ranger v6.1.2", - "Cell Ranger 9.0.1", - "Cell Ranger v8.0.1", - "Space Ranger 3.1.0", - "Space Ranger 3.1.1", - "Cell Ranger v6.1.1", - "" - ] - }, - "specimenModality": { - "enum": [ - "unknown", - "single specimen", - "multispecimen" - ] + "PercentCellViability": { + "description": "A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.", + "maximum": 100.0, + "minimum": 50.0, + "title": "percentCellViability", + "type": "integer" }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "platform": { - "type": "array", + "Platform": { + "description": "The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.", "items": { "enum": [ - "Thermo Fisher Attune NxT", - "BD FACSLyric Clinical", + "BD FACSAria Fusion cell sorter", "BD FACSAria III", + "BD FACSCanto", "BD FACSCanto II", - "Chromium Controller", - "Thermo Fisher Attune Xenith", - "Chromium X", - "Cytek Aurora Evo", - "Illumina NovaSeq 6000", - "Xenium", - "BD FACSymphony S6", - "Sony MA900", - "Helios Mass Cytometer", - "Thermo Fisher Attune CytPix", - "Illumina NovaSeq X", + "BD FACSDiscover A8", + "BD FACSDiscover S8", + "BD FACSLyric Clinical", "BD FACSMelody", - "CyTOF XT", + "BD FACSymphony S6", + "BD LSRFortessa", + "Chromium Controller", "Chromium GEM-X Single Cell 3' Chip v4", - "Chromium Xo", "Chromium Next GEM Chip G", - "Illumina HiSeq 2500", + "Chromium Next GEM Chip H", + "Chromium Next GEM Chip K", + "Chromium Next GEM Chip M", "Chromium Next GEM Chip Q", + "Chromium X", + "Chromium Xo", + "Chromium iX", + "CyTOF XT", "Cytek Aurora", - "Illumina NextSeq 500", - "unknown", - "Chromium Next GEM Chip K", + "Cytek Aurora Evo", "Fluidigm BioMark", - "BD FACSCanto", - "BD FACSDiscover A8", - "Chromium iX", - "Illumina HiSeq X Ten", - "Olink Signature Q100", - "none", - "Chromium Next GEM Chip H", - "Chromium Next GEM Chip M", "GEM-X Flex Gene Expression Chip", - "BD FACSDiscover S8", - "Visium CytAssist", - "Not Applicable", "GEM-X OCM 5' Chip", - "BD FACSAria Fusion cell sorter", + "Helios Mass Cytometer", "Hyperion", - "BD LSRFortessa" - ] + "Illumina HiSeq 2500", + "Illumina HiSeq X Ten", + "Illumina NextSeq 500", + "Illumina NovaSeq 6000", + "Illumina NovaSeq X", + "Not Applicable", + "Olink Signature Q100", + "Sony MA900", + "Thermo Fisher Attune CytPix", + "Thermo Fisher Attune NxT", + "Thermo Fisher Attune Xenith", + "Visium CytAssist", + "Xenium", + "none", + "unknown" + ], + "type": "string" }, - "maxItems": 43 + "title": "platform", + "type": "array" }, - "nucleicAcidSource": { - "type": "array", - "items": { - "enum": [ - "poly(A) RNA", - "CRISPR protospacer feature barcode", - "gDNA", - "TCR mRNA", - "multiplexing oligo", - "BCR mRNA", - "globin-depleted RNA", - "intracellular protein feature barcode", - "surface protein feature barcode", - "Tn5-accessible gDNA", - "rRNA-depleted RNA", - "antigen capture barcode" - ] - }, - "maxItems": 12 + "SampleProcessingBatch": { + "description": "A label indicating batching of sample processing or preparation that occurs prior to data collection.", + "title": "sampleProcessingBatch", + "type": "string" }, - "percentCellViability": { - "not": { - "type": "null" - }, - "minLength": 1 + "SequencingSaturation": { + "description": "A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.", + "maximum": 1.0, + "minimum": 0.0, + "title": "sequencingSaturation", + "type": "number" }, - "10xProbeSetReference": { + "SoftwareAndVersion": { + "description": "Relevant software and version used to generate the data file.", "enum": [ - "Flex Human Transcriptome Probe Set v1.1.0", - "Flex Human Transcriptome Probe Set v1.0.1", - "custom probe set", - "Visium Human Transcriptome v1", - "Visium Human Transcriptome v2", - "" - ] + "BD FACSDiva 8.0.1", + "Cell Ranger 9.0.1", + "Cell Ranger ATAC v1.1.0", + "Cell Ranger v3.0.0", + "Cell Ranger v3.0.1", + "Cell Ranger v3.0.2", + "Cell Ranger v3.1.0", + "Cell Ranger v4.0.0", + "Cell Ranger v5.0.0", + "Cell Ranger v5.0.1", + "Cell Ranger v6.0.0", + "Cell Ranger v6.0.1", + "Cell Ranger v6.0.2", + "Cell Ranger v6.1.0", + "Cell Ranger v6.1.1", + "Cell Ranger v6.1.2", + "Cell Ranger v7.0.0", + "Cell Ranger v7.0.1", + "Cell Ranger v7.1.0", + "Cell Ranger v7.2.0", + "Cell Ranger v8.0.0", + "Cell Ranger v8.0.1", + "Cell Ranger v9.0.0", + "Space Ranger 3.0.0", + "Space Ranger 3.0.1", + "Space Ranger 3.1.0", + "Space Ranger 3.1.1", + "Space Ranger 3.1.2", + "Space Ranger 3.1.3", + "demuxlet" + ], + "title": "softwareAndVersion" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" }, - "biospecimenID": {}, - "libraryID": {} + "TotalReads": { + "description": "Total number of reads sequenced from the library.", + "title": "totalReads", + "type": "integer" + } }, "required": [ - "alignmentReference", - "assay", - "inputCellCount", - "libraryPrepMethod", - "totalReads", - "specimenModality", + "AlignmentReference", + "Assay", "Component", - "platform", - "nucleicAcidSource", - "percentCellViability" + "InputCellCount", + "LibraryPrepMethod", + "NucleicAcidSource", + "PercentCellViability", + "Platform", + "SpecimenModality", + "TotalReads" ], - "allOf": [ - { - "if": { - "properties": { - "libraryPrepMethod": { - "enum": [ - "10x GEM-X Flex Gene Expression Human" - ] - } - }, - "required": [ - "libraryPrepMethod" - ] - }, - "then": { - "properties": { - "10xProbeSetReference": { - "enum": [ - "Flex Human Transcriptome Probe Set v1.1.0", - "Flex Human Transcriptome Probe Set v1.0.1", - "custom probe set", - "Visium Human Transcriptome v1", - "Visium Human Transcriptome v2" - ] - } - }, - "required": [ - "10xProbeSetReference" - ] - } - }, - { - "if": { - "properties": { - "specimenModality": { - "enum": [ - "single specimen" - ] - } - }, - "required": [ - "specimenModality" - ] - }, - "then": { - "properties": { - "biospecimenID": { - "not": { - "type": "null" - }, - "minLength": 1 - } - }, - "required": [ - "biospecimenID" - ] - } - }, - { - "if": { - "properties": { - "specimenModality": { - "enum": [ - "multispecimen" - ] - } - }, - "required": [ - "specimenModality" - ] - }, - "then": { - "properties": { - "libraryID": { - "not": { - "type": "null" - }, - "minLength": 1 - } - }, - "required": [ - "libraryID" - ] - } - } - ] + "title": "ScRNASeqAssayMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.ScRNASeqProcessedDataAnnotationTemplate.schema.json b/model_json_schema/ark.ScRNASeqProcessedDataAnnotationTemplate.schema.json index c50476a0..7e5c2299 100644 --- a/model_json_schema/ark.ScRNASeqProcessedDataAnnotationTemplate.schema.json +++ b/model_json_schema/ark.ScRNASeqProcessedDataAnnotationTemplate.schema.json @@ -1,387 +1,388 @@ { + "$id": "http://example.com/ScRNASeqProcessedDataAnnotationTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "resourceType": { - "type": "array", - "items": { - "enum": [ - "metadata", - "experimental data" - ] - }, - "maxItems": 2 - }, - "assay": { - "type": "array", - "items": { - "enum": [ - "NULISA", - "Olink Target 96", - "serial IHC", - "scRNASeq", - "imaging mass cytometry", - "Olink Target 48", - "Olink Focus", - "CyTOF", - "ASAPSeq", - "Olink Flex", - "H&E", - "flow cytometry", - "SomaScan", - "CosMX", - "Xenium", - "kiloplex", - "CITESeq", - "VDJSeq", - "Olink Reveal", - "Olink Explore HT", - "SNP array", - "feature barcode sequencing", - "LC-MS/MS", - "Visium", - "CE-MS", - "imaging mass spectrometry", - "WES", - "scVDJSeq", - "multiplexed ELISA", - "snATACSeq", - "snRNASeq", - "RNASeq", - "GenePS SeqFISH", - "WGS" - ] - }, - "maxItems": 34 - }, - "fileFormat": { - "enum": [ - "bai", - "fam", - "bim", - "bam", - "mtx", - "tgz", - "xlsx", - "xls", - "txt", - "bed", - "h5", - "tsv", - "csv", - "h5ad", - "Rds", - "zip" - ] - }, - "dataLevel": { - "enum": [ - "1", - "2", - "5", - "3", - "4" - ] - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "cellRangerOutput": { - "enum": [ - "raw MEX", - "filtered_peak_bc_matrix", - "filtered_feature_bc_matrix", - "Not Applicable", - "raw_feature_bc_matrix", - "raw_peak_bc_matrix", - "filtered MEX" - ] - }, - "specimenModality": { - "enum": [ - "multispecimen", - "single specimen", - "unknown" - ] - }, - "metadataType": { - "enum": [ - "single-cell metadata", - "" - ] - }, - "processedDataType": { - "type": "array", - "items": { - "enum": [ - "differential expression results", - "epigenomic peaks", - "gene counts", - "barcode counts", - "" - ] - }, - "maxItems": 4 - }, - "targetPanelSize": {}, - "targetPanelSynID": {}, - "targetPanel": {}, - "RObjectClass": { - "enum": [ - "Symphony reference", - "vector", - "data.frame", - "sparse matrix", - "ROCR prediction.object", - "list", - "matrix", - "SummarizedExperiment", - "Seurat object", - "" - ] - }, - "individualID": {}, - "biospecimenID": {} - }, - "required": [ - "resourceType", - "assay", - "fileFormat", - "dataLevel", - "Component", - "cellRangerOutput", - "specimenModality" - ], "allOf": [ { "if": { "properties": { - "resourceType": { + "Assay": { "enum": [ - "metadata" + "Featurebarcodesequencing" ] } - }, - "required": [ - "resourceType" - ] + } }, "then": { "properties": { - "metadataType": { - "enum": [ - "single-cell metadata" - ] + "TargetPanel": { + "not": { + "type": "null" + } } }, "required": [ - "metadataType" + "TargetPanel" ] } }, { "if": { "properties": { - "resourceType": { + "Assay": { "enum": [ - "experimental data" + "Featurebarcodesequencing" ] } - }, - "required": [ - "resourceType" - ] + } }, "then": { "properties": { - "processedDataType": { - "type": "array", - "items": { - "enum": [ - "differential expression results", - "epigenomic peaks", - "gene counts", - "barcode counts", - "" - ] - }, - "maxItems": 4 + "TargetPanelSize": { + "not": { + "type": "null" + } } }, "required": [ - "processedDataType" + "TargetPanelSize" ] } }, { "if": { "properties": { - "assay": { + "Assay": { "enum": [ - "feature barcode sequencing" + "Featurebarcodesequencing" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanelSize": {} + "TargetPanelSynID": { + "not": { + "type": "null" + } + } }, "required": [ - "targetPanelSize" + "TargetPanelSynID" ] } }, { "if": { "properties": { - "assay": { + "FileFormat": { "enum": [ - "feature barcode sequencing" + "Rds" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanelSynID": { + "RObjectClass": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "targetPanelSynID" + "RObjectClass" ] } }, { "if": { "properties": { - "assay": { + "ResourceType": { "enum": [ - "feature barcode sequencing" + "Experimentaldata" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanel": { + "ProcessedDataType": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "targetPanel" + "ProcessedDataType" ] } }, { "if": { "properties": { - "fileFormat": { + "ResourceType": { "enum": [ - "Rds" + "Metadata" ] } - }, - "required": [ - "fileFormat" - ] + } }, "then": { "properties": { - "RObjectClass": { - "enum": [ - "Symphony reference", - "vector", - "data.frame", - "sparse matrix", - "ROCR prediction.object", - "list", - "matrix", - "SummarizedExperiment", - "Seurat object", - "" - ] + "MetadataType": { + "not": { + "type": "null" + } } }, "required": [ - "RObjectClass" + "MetadataType" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "single specimen" + "Singlespecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "individualID": { + "BiospecimenID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "individualID" + "BiospecimenID" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "single specimen" + "Singlespecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "IndividualID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "IndividualID" ] } } - ] + ], + "description": "A data contributor template outlining metadata to be collected as file annotations for scRNA-seq processed data files (i.e., anything not a fastq file).", + "properties": { + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "items": { + "enum": [ + "ASAPSeq", + "CE-MS", + "CITESeq", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", + "LC-MS/MS", + "NULISA", + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", + "WES", + "WGS", + "Xenium", + "feature barcode sequencing", + "flow cytometry", + "imaging mass cytometry", + "imaging mass spectrometry", + "kiloplex", + "multiplexed ELISA", + "scRNASeq", + "scVDJSeq", + "serial IHC", + "snATACSeq", + "snRNASeq" + ], + "type": "string" + }, + "title": "assay", + "type": "array" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "items": { + "type": "string" + }, + "title": "biospecimenID", + "type": "array" + }, + "CellRangerOutput": { + "description": "10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz", + "enum": [ + "Not Applicable", + "filtered MEX", + "filtered_feature_bc_matrix", + "filtered_peak_bc_matrix", + "raw MEX", + "raw_feature_bc_matrix", + "raw_peak_bc_matrix" + ], + "title": "cellRangerOutput" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataLevel": { + "description": "Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.", + "enum": [ + "1", + "2", + "3", + "4", + "5" + ], + "title": "dataLevel" + }, + "FileFormat": { + "description": "Standard file format name or file extension", + "enum": [ + "bai", + "bam", + "bed", + "bim", + "csv", + "fam", + "h5", + "h5ad", + "mtx", + "rds", + "tgz", + "tsv", + "txt", + "xls", + "xlsx", + "zip" + ], + "title": "fileFormat" + }, + "IndividualID": { + "description": "Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.", + "items": { + "type": "string" + }, + "title": "individualID", + "type": "array" + }, + "MetadataType": { + "description": "A label further classifying the content of metadata resource.", + "enum": [ + "single-cell metadata" + ], + "title": "metadataType" + }, + "ProcessedDataType": { + "description": "A label used for file annotations to provide a brief description of the processed data file.", + "items": { + "enum": [ + "barcode counts", + "differential expression results", + "epigenomic peaks", + "gene counts" + ], + "type": "string" + }, + "title": "processedDataType", + "type": "array" + }, + "RObjectClass": { + "description": "Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.", + "enum": [ + "ROCR prediction.object", + "Seurat object", + "SummarizedExperiment", + "Symphony reference", + "data.frame", + "list", + "matrix", + "sparse matrix", + "vector" + ], + "title": "RObjectClass" + }, + "ResourceType": { + "description": "High-level classification of the file content", + "items": { + "enum": [ + "experimental data", + "metadata" + ], + "type": "string" + }, + "title": "resourceType", + "type": "array" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + }, + "TargetPanel": { + "description": "A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.", + "title": "targetPanel", + "type": "string" + }, + "TargetPanelSize": { + "description": "The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).", + "title": "targetPanelSize", + "type": "integer" + }, + "TargetPanelSynID": { + "description": "In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.", + "pattern": "^syn[0-9]{8}", + "title": "targetPanelSynID", + "type": "string" + } + }, + "required": [ + "Assay", + "CellRangerOutput", + "Component", + "DataLevel", + "FileFormat", + "ResourceType", + "SpecimenModality" + ], + "title": "ScRNASeqProcessedDataAnnotationTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.ScVDJSeqProcessedDataAnnotationTemplate.schema.json b/model_json_schema/ark.ScVDJSeqProcessedDataAnnotationTemplate.schema.json index f7083bd3..da5b0f22 100644 --- a/model_json_schema/ark.ScVDJSeqProcessedDataAnnotationTemplate.schema.json +++ b/model_json_schema/ark.ScVDJSeqProcessedDataAnnotationTemplate.schema.json @@ -1,375 +1,374 @@ { + "$id": "http://example.com/ScVDJSeqProcessedDataAnnotationTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "dataLevel": { - "enum": [ - "2", - "5", - "4", - "3", - "1" - ] - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "specimenModality": { - "enum": [ - "multispecimen", - "unknown", - "single specimen" - ] - }, - "fileFormat": { - "enum": [ - "h5ad", - "tsv", - "bai", - "zip", - "bam", - "fam", - "bim", - "mtx", - "Rds", - "h5", - "xlsx", - "xls", - "tgz", - "txt", - "bed", - "csv" - ] - }, - "assay": { - "type": "array", - "items": { - "enum": [ - "NULISA", - "SNP array", - "snRNASeq", - "snATACSeq", - "imaging mass spectrometry", - "scRNASeq", - "kiloplex", - "Olink Target 48", - "Visium", - "Olink Focus", - "imaging mass cytometry", - "serial IHC", - "RNASeq", - "Xenium", - "WES", - "flow cytometry", - "Olink Flex", - "CITESeq", - "GenePS SeqFISH", - "H&E", - "Olink Explore HT", - "Olink Reveal", - "feature barcode sequencing", - "scVDJSeq", - "CosMX", - "LC-MS/MS", - "CE-MS", - "Olink Target 96", - "VDJSeq", - "ASAPSeq", - "multiplexed ELISA", - "SomaScan", - "WGS", - "CyTOF" - ] - }, - "maxItems": 34 - }, - "resourceType": { - "type": "array", - "items": { - "enum": [ - "metadata", - "experimental data" - ] - }, - "maxItems": 2 - }, - "biospecimenID": {}, - "individualID": {}, - "RObjectClass": { - "enum": [ - "vector", - "ROCR prediction.object", - "SummarizedExperiment", - "matrix", - "Symphony reference", - "sparse matrix", - "list", - "data.frame", - "Seurat object", - "" - ] - }, - "targetPanelSize": {}, - "targetPanelSynID": {}, - "targetPanel": {}, - "metadataType": { - "enum": [ - "single-cell metadata", - "" - ] - }, - "processedDataType": { - "type": "array", - "items": { - "enum": [ - "barcode counts", - "gene counts", - "differential expression results", - "epigenomic peaks", - "" - ] - }, - "maxItems": 4 - } - }, - "required": [ - "dataLevel", - "Component", - "specimenModality", - "fileFormat", - "assay", - "resourceType" - ], "allOf": [ { "if": { "properties": { - "specimenModality": { + "Assay": { "enum": [ - "single specimen" + "Featurebarcodesequencing" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "TargetPanel": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "TargetPanel" ] } }, { "if": { "properties": { - "specimenModality": { + "Assay": { "enum": [ - "single specimen" + "Featurebarcodesequencing" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "individualID": { + "TargetPanelSize": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "individualID" + "TargetPanelSize" ] } }, { "if": { "properties": { - "fileFormat": { + "Assay": { "enum": [ - "Rds" + "Featurebarcodesequencing" ] } - }, - "required": [ - "fileFormat" - ] + } }, "then": { "properties": { - "RObjectClass": { - "enum": [ - "vector", - "ROCR prediction.object", - "SummarizedExperiment", - "matrix", - "Symphony reference", - "sparse matrix", - "list", - "data.frame", - "Seurat object", - "" - ] + "TargetPanelSynID": { + "not": { + "type": "null" + } } }, "required": [ - "RObjectClass" + "TargetPanelSynID" ] } }, { "if": { "properties": { - "assay": { + "FileFormat": { "enum": [ - "feature barcode sequencing" + "Rds" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanelSize": {} + "RObjectClass": { + "not": { + "type": "null" + } + } }, "required": [ - "targetPanelSize" + "RObjectClass" ] } }, { "if": { "properties": { - "assay": { + "ResourceType": { "enum": [ - "feature barcode sequencing" + "Experimentaldata" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanelSynID": { + "ProcessedDataType": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "targetPanelSynID" + "ProcessedDataType" ] } }, { "if": { "properties": { - "assay": { + "ResourceType": { "enum": [ - "feature barcode sequencing" + "Metadata" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanel": { + "MetadataType": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "targetPanel" + "MetadataType" ] } }, { "if": { "properties": { - "resourceType": { + "SpecimenModality": { "enum": [ - "metadata" + "Singlespecimen" ] } - }, - "required": [ - "resourceType" - ] + } }, "then": { "properties": { - "metadataType": { - "enum": [ - "single-cell metadata" - ] + "BiospecimenID": { + "not": { + "type": "null" + } } }, "required": [ - "metadataType" + "BiospecimenID" ] } }, { "if": { "properties": { - "resourceType": { + "SpecimenModality": { "enum": [ - "experimental data" + "Singlespecimen" ] } - }, - "required": [ - "resourceType" - ] + } }, "then": { "properties": { - "processedDataType": { - "type": "array", - "items": { - "enum": [ - "barcode counts", - "gene counts", - "differential expression results", - "epigenomic peaks", - "" - ] - }, - "maxItems": 4 + "IndividualID": { + "not": { + "type": "null" + } } }, "required": [ - "processedDataType" + "IndividualID" ] } } - ] + ], + "description": "A data contributor template outlining metadata to be collected as file annotations for scVDJ-seq (i.e., immune repertoire profiling) processed data files (i.e., anything not a fastq file).", + "properties": { + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "items": { + "enum": [ + "ASAPSeq", + "CE-MS", + "CITESeq", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", + "LC-MS/MS", + "NULISA", + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", + "WES", + "WGS", + "Xenium", + "feature barcode sequencing", + "flow cytometry", + "imaging mass cytometry", + "imaging mass spectrometry", + "kiloplex", + "multiplexed ELISA", + "scRNASeq", + "scVDJSeq", + "serial IHC", + "snATACSeq", + "snRNASeq" + ], + "type": "string" + }, + "title": "assay", + "type": "array" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "items": { + "type": "string" + }, + "title": "biospecimenID", + "type": "array" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataLevel": { + "description": "Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.", + "enum": [ + "1", + "2", + "3", + "4", + "5" + ], + "title": "dataLevel" + }, + "FileFormat": { + "description": "Standard file format name or file extension", + "enum": [ + "bai", + "bam", + "bed", + "bim", + "csv", + "fam", + "h5", + "h5ad", + "mtx", + "rds", + "tgz", + "tsv", + "txt", + "xls", + "xlsx", + "zip" + ], + "title": "fileFormat" + }, + "IndividualID": { + "description": "Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.", + "items": { + "type": "string" + }, + "title": "individualID", + "type": "array" + }, + "MetadataType": { + "description": "A label further classifying the content of metadata resource.", + "enum": [ + "single-cell metadata" + ], + "title": "metadataType" + }, + "ProcessedDataType": { + "description": "A label used for file annotations to provide a brief description of the processed data file.", + "items": { + "enum": [ + "barcode counts", + "differential expression results", + "epigenomic peaks", + "gene counts" + ], + "type": "string" + }, + "title": "processedDataType", + "type": "array" + }, + "RObjectClass": { + "description": "Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.", + "enum": [ + "ROCR prediction.object", + "Seurat object", + "SummarizedExperiment", + "Symphony reference", + "data.frame", + "list", + "matrix", + "sparse matrix", + "vector" + ], + "title": "RObjectClass" + }, + "ResourceType": { + "description": "High-level classification of the file content", + "items": { + "enum": [ + "experimental data", + "metadata" + ], + "type": "string" + }, + "title": "resourceType", + "type": "array" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + }, + "TargetPanel": { + "description": "A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.", + "title": "targetPanel", + "type": "string" + }, + "TargetPanelSize": { + "description": "The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).", + "title": "targetPanelSize", + "type": "integer" + }, + "TargetPanelSynID": { + "description": "In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.", + "pattern": "^syn[0-9]{8}", + "title": "targetPanelSynID", + "type": "string" + } + }, + "required": [ + "Assay", + "Component", + "DataLevel", + "FileFormat", + "ResourceType", + "SpecimenModality" + ], + "title": "ScVDJSeqProcessedDataAnnotationTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.SnATAC-seqAssayMetadataTemplate.schema.json b/model_json_schema/ark.SnATAC-seqAssayMetadataTemplate.schema.json index 857ff394..0b4644f8 100644 --- a/model_json_schema/ark.SnATAC-seqAssayMetadataTemplate.schema.json +++ b/model_json_schema/ark.SnATAC-seqAssayMetadataTemplate.schema.json @@ -1,345 +1,368 @@ { + "$id": "http://example.com/SnATAC-seqAssayMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "assay": { - "type": "array", - "items": { - "enum": [ - "kiloplex", - "flow cytometry", - "RNASeq", - "GenePS SeqFISH", - "Olink Target 48", - "SNP array", - "Olink Flex", - "LC-MS/MS", - "CosMX", - "serial IHC", - "snRNASeq", - "imaging mass spectrometry", - "feature barcode sequencing", - "Olink Explore HT", - "H&E", - "Visium", - "WGS", - "scVDJSeq", - "Olink Target 96", - "NULISA", - "CE-MS", - "CITESeq", - "ASAPSeq", - "SomaScan", - "CyTOF", - "Olink Reveal", - "multiplexed ELISA", - "Xenium", - "Olink Focus", - "imaging mass cytometry", - "WES", - "scRNASeq", - "snATACSeq", - "VDJSeq" - ] - }, - "maxItems": 34 - }, - "nucleicAcidSource": { - "type": "array", - "items": { - "enum": [ - "CRISPR protospacer feature barcode", - "globin-depleted RNA", - "Tn5-accessible gDNA", - "intracellular protein feature barcode", - "BCR mRNA", - "poly(A) RNA", - "surface protein feature barcode", - "gDNA", - "rRNA-depleted RNA", - "multiplexing oligo", - "TCR mRNA", - "antigen capture barcode" - ] - }, - "maxItems": 12 - }, - "specimenModality": { - "enum": [ - "multispecimen", - "unknown", - "single specimen" - ] - }, - "dataCollectionBatch": {}, - "softwareAndVersion": { - "enum": [ - "Cell Ranger v8.0.0", - "Cell Ranger 9.0.1", - "Cell Ranger ATAC v1.1.0", - "Cell Ranger v3.1.0", - "Space Ranger 3.1.2", - "Cell Ranger v5.0.0", - "Cell Ranger v6.1.2", - "Cell Ranger v9.0.0", - "Cell Ranger v5.0.1", - "Cell Ranger v4.0.0", - "Cell Ranger v7.1.0", - "Space Ranger 3.0.0", - "Cell Ranger v6.1.0", - "Space Ranger 3.1.1", - "demuxlet", - "Cell Ranger v8.0.1", - "Cell Ranger v7.0.0", - "Space Ranger 3.0.1", - "Space Ranger 3.1.0", - "Cell Ranger v6.0.1", - "Cell Ranger v6.1.1", - "Cell Ranger v3.0.0", - "Cell Ranger v7.0.1", - "Cell Ranger v3.0.1", - "Cell Ranger v3.0.2", - "Space Ranger 3.1.3", - "BD FACSDiva 8.0.1", - "Cell Ranger v6.0.2", - "Cell Ranger v6.0.0", - "Cell Ranger v7.2.0", - "" - ] - }, - "alignmentReference": { - "enum": [ - "10x Cell Ranger Human GRCh38 2020-A", - "GRCh38", - "10x Cell Ranger Human GRCh38 2024-A", - "vdj_GRCh38_alts_ensembl-4.0.0", - "unknown", - "modified GRCh38" - ] - }, - "totalReads": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "percentCellViability": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "platform": { - "type": "array", - "items": { - "enum": [ - "BD FACSDiscover S8", - "Chromium Xo", - "Visium CytAssist", - "Illumina NovaSeq 6000", - "GEM-X Flex Gene Expression Chip", - "Cytek Aurora Evo", - "none", - "Chromium X", - "BD FACSAria III", - "Olink Signature Q100", - "Hyperion", - "Chromium Controller", - "Illumina HiSeq X Ten", - "Chromium Next GEM Chip G", - "Illumina NextSeq 500", - "Chromium Next GEM Chip H", - "BD FACSMelody", - "BD FACSDiscover A8", - "Not Applicable", - "Chromium Next GEM Chip K", - "Chromium GEM-X Single Cell 3' Chip v4", - "Cytek Aurora", - "BD FACSymphony S6", - "BD LSRFortessa", - "Sony MA900", - "Thermo Fisher Attune CytPix", - "Xenium", - "Helios Mass Cytometer", - "CyTOF XT", - "Fluidigm BioMark", - "Chromium Next GEM Chip M", - "Thermo Fisher Attune Xenith", - "Illumina HiSeq 2500", - "Chromium iX", - "Illumina NovaSeq X", - "BD FACSCanto II", - "BD FACSCanto", - "unknown", - "BD FACSLyric Clinical", - "BD FACSAria Fusion cell sorter", - "Chromium Next GEM Chip Q", - "GEM-X OCM 5' Chip", - "Thermo Fisher Attune NxT" - ] - }, - "maxItems": 43 - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "sequencingSaturation": {}, - "sampleProcessingBatch": {}, - "inputCellCount": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "libraryPrepMethod": { - "enum": [ - "Takara Human TCR profiling for Illumina", - "SMART-Seq Human TCR with UMI", - "10x Chromium GEM-X Single Cell 3' v4", - "Takara Human scTCR profiling for Illumina", - "SMART-Seq v4 Ultra Low Input RNA", - "10x Chromium Next GEM Single Cell 5' v2", - "10x Chromium Next GEM Single Cell 3' 3.1", - "Takara Human BCR profiling for Illumina", - "10x GEM-X Flex Gene Expression Human", - "QIAseq miRNA Library", - "10x Chromium Next GEM Single Cell 5' v1.1", - "Chromium Next GEM Single Cell ATAC v1.1", - "SMARTer Stranded Total RNA v2", - "Takara Human TCRv2 profiling for Illumina", - "10x Chromium Next GEM Single Cell 3'", - "NEBNext Human Immune Sequencing Kit", - "CEL-Seq2", - "10x Chromium Fixed RNA Human Transcriptome", - "10x Chromium Single Cell Human BCR", - "10x Chromium GEM-X Single Cell 5' v3", - "in-house library prep", - "custom DASH-treatment", - "Fluidigm C1 HT", - "TruSeq Stranded mRNA", - "NEBNext Ultra II Directional RNA Library", - "10x GEM-X Universal 5' Gene Expression v3", - "SMART-Seq Human BCR with UMI", - "Nextera XT DNA", - "10x Chromium Single Cell Human TCR", - "Nextera XT", - "10x Chromium Next GEM Single Cell ATAC v2" - ] - }, - "libraryID": {}, - "biospecimenID": {}, - "10xProbeSetReference": { - "enum": [ - "Flex Human Transcriptome Probe Set v1.0.1", - "Visium Human Transcriptome v1", - "Flex Human Transcriptome Probe Set v1.1.0", - "Visium Human Transcriptome v2", - "custom probe set", - "" - ] - } - }, - "required": [ - "assay", - "nucleicAcidSource", - "specimenModality", - "alignmentReference", - "totalReads", - "percentCellViability", - "platform", - "Component", - "inputCellCount", - "libraryPrepMethod" - ], "allOf": [ { "if": { "properties": { - "specimenModality": { + "LibraryPrepMethod": { "enum": [ - "multispecimen" + "10xGEM-XFlexGeneExpressionHuman" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "libraryID": { + "10xProbeSetReference": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "libraryID" + "10xProbeSetReference" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "single specimen" + "Multispecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "LibraryID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "LibraryID" ] } }, { "if": { "properties": { - "libraryPrepMethod": { + "SpecimenModality": { "enum": [ - "10x GEM-X Flex Gene Expression Human" + "Singlespecimen" ] } - }, - "required": [ - "libraryPrepMethod" - ] + } }, "then": { "properties": { - "10xProbeSetReference": { - "enum": [ - "Flex Human Transcriptome Probe Set v1.0.1", - "Visium Human Transcriptome v1", - "Flex Human Transcriptome Probe Set v1.1.0", - "Visium Human Transcriptome v2", - "custom probe set" - ] + "BiospecimenID": { + "not": { + "type": "null" + } } }, "required": [ - "10xProbeSetReference" + "BiospecimenID" ] } } - ] + ], + "description": "A template outlining metadata to be collected for each library in a snATAC-seq dataset.", + "properties": { + "10xProbeSetReference": { + "description": "Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.", + "enum": [ + "Flex Human Transcriptome Probe Set v1.0.1", + "Flex Human Transcriptome Probe Set v1.1.0", + "Visium Human Transcriptome v1", + "Visium Human Transcriptome v2", + "custom probe set" + ], + "title": "10xProbeSetReference" + }, + "AlignmentReference": { + "description": "The genomic/transcriptomic reference used for performing read alignment against.", + "enum": [ + "10x Cell Ranger Human GRCh38 2020-A", + "10x Cell Ranger Human GRCh38 2024-A", + "GRCh38", + "modified GRCh38", + "unknown", + "vdj_GRCh38_alts_ensembl-4.0.0" + ], + "title": "alignmentReference" + }, + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "items": { + "enum": [ + "ASAPSeq", + "CE-MS", + "CITESeq", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", + "LC-MS/MS", + "NULISA", + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", + "WES", + "WGS", + "Xenium", + "feature barcode sequencing", + "flow cytometry", + "imaging mass cytometry", + "imaging mass spectrometry", + "kiloplex", + "multiplexed ELISA", + "scRNASeq", + "scVDJSeq", + "serial IHC", + "snATACSeq", + "snRNASeq" + ], + "type": "string" + }, + "title": "assay", + "type": "array" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "items": { + "type": "string" + }, + "title": "biospecimenID", + "type": "array" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataCollectionBatch": { + "description": "A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.", + "title": "dataCollectionBatch", + "type": "string" + }, + "InputCellCount": { + "description": "An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.", + "title": "inputCellCount", + "type": "integer" + }, + "LibraryID": { + "description": "A library label or name, unique within an experiment, used to distinguish sequencing libraries.", + "title": "libraryID", + "type": "string" + }, + "LibraryPrepMethod": { + "description": "Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.", + "enum": [ + "10x Chromium Fixed RNA Human Transcriptome", + "10x Chromium GEM-X Single Cell 3' v4", + "10x Chromium GEM-X Single Cell 5' v3", + "10x Chromium Next GEM Single Cell 3'", + "10x Chromium Next GEM Single Cell 3' 3.1", + "10x Chromium Next GEM Single Cell 5' v1.1", + "10x Chromium Next GEM Single Cell 5' v2", + "10x Chromium Next GEM Single Cell ATAC v2", + "10x Chromium Single Cell Human BCR", + "10x Chromium Single Cell Human TCR", + "10x GEM-X Flex Gene Expression Human", + "10x GEM-X Universal 5' Gene Expression v3", + "CEL-Seq2", + "Chromium Next GEM Single Cell ATAC v1.1", + "Fluidigm C1 HT", + "NEBNext Human Immune Sequencing Kit", + "NEBNext Ultra II Directional RNA Library", + "Nextera XT", + "Nextera XT DNA", + "QIAseq miRNA Library", + "SMART-Seq Human BCR with UMI", + "SMART-Seq Human TCR with UMI", + "SMART-Seq v4 Ultra Low Input RNA", + "SMARTer Stranded Total RNA v2", + "Takara Human BCR profiling for Illumina", + "Takara Human TCR profiling for Illumina", + "Takara Human TCRv2 profiling for Illumina", + "Takara Human scTCR profiling for Illumina", + "TruSeq Stranded mRNA", + "custom DASH-treatment", + "in-house library prep" + ], + "title": "libraryPrepMethod" + }, + "NucleicAcidSource": { + "description": "The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.", + "items": { + "enum": [ + "BCR mRNA", + "CRISPR protospacer feature barcode", + "TCR mRNA", + "Tn5-accessible gDNA", + "antigen capture barcode", + "gDNA", + "globin-depleted RNA", + "intracellular protein feature barcode", + "multiplexing oligo", + "poly(A) RNA", + "rRNA-depleted RNA", + "surface protein feature barcode" + ], + "type": "string" + }, + "title": "nucleicAcidSource", + "type": "array" + }, + "PercentCellViability": { + "description": "A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.", + "maximum": 100.0, + "minimum": 50.0, + "title": "percentCellViability", + "type": "integer" + }, + "Platform": { + "description": "The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.", + "items": { + "enum": [ + "BD FACSAria Fusion cell sorter", + "BD FACSAria III", + "BD FACSCanto", + "BD FACSCanto II", + "BD FACSDiscover A8", + "BD FACSDiscover S8", + "BD FACSLyric Clinical", + "BD FACSMelody", + "BD FACSymphony S6", + "BD LSRFortessa", + "Chromium Controller", + "Chromium GEM-X Single Cell 3' Chip v4", + "Chromium Next GEM Chip G", + "Chromium Next GEM Chip H", + "Chromium Next GEM Chip K", + "Chromium Next GEM Chip M", + "Chromium Next GEM Chip Q", + "Chromium X", + "Chromium Xo", + "Chromium iX", + "CyTOF XT", + "Cytek Aurora", + "Cytek Aurora Evo", + "Fluidigm BioMark", + "GEM-X Flex Gene Expression Chip", + "GEM-X OCM 5' Chip", + "Helios Mass Cytometer", + "Hyperion", + "Illumina HiSeq 2500", + "Illumina HiSeq X Ten", + "Illumina NextSeq 500", + "Illumina NovaSeq 6000", + "Illumina NovaSeq X", + "Not Applicable", + "Olink Signature Q100", + "Sony MA900", + "Thermo Fisher Attune CytPix", + "Thermo Fisher Attune NxT", + "Thermo Fisher Attune Xenith", + "Visium CytAssist", + "Xenium", + "none", + "unknown" + ], + "type": "string" + }, + "title": "platform", + "type": "array" + }, + "SampleProcessingBatch": { + "description": "A label indicating batching of sample processing or preparation that occurs prior to data collection.", + "title": "sampleProcessingBatch", + "type": "string" + }, + "SequencingSaturation": { + "description": "A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.", + "maximum": 1.0, + "minimum": 0.0, + "title": "sequencingSaturation", + "type": "number" + }, + "SoftwareAndVersion": { + "description": "Relevant software and version used to generate the data file.", + "enum": [ + "BD FACSDiva 8.0.1", + "Cell Ranger 9.0.1", + "Cell Ranger ATAC v1.1.0", + "Cell Ranger v3.0.0", + "Cell Ranger v3.0.1", + "Cell Ranger v3.0.2", + "Cell Ranger v3.1.0", + "Cell Ranger v4.0.0", + "Cell Ranger v5.0.0", + "Cell Ranger v5.0.1", + "Cell Ranger v6.0.0", + "Cell Ranger v6.0.1", + "Cell Ranger v6.0.2", + "Cell Ranger v6.1.0", + "Cell Ranger v6.1.1", + "Cell Ranger v6.1.2", + "Cell Ranger v7.0.0", + "Cell Ranger v7.0.1", + "Cell Ranger v7.1.0", + "Cell Ranger v7.2.0", + "Cell Ranger v8.0.0", + "Cell Ranger v8.0.1", + "Cell Ranger v9.0.0", + "Space Ranger 3.0.0", + "Space Ranger 3.0.1", + "Space Ranger 3.1.0", + "Space Ranger 3.1.1", + "Space Ranger 3.1.2", + "Space Ranger 3.1.3", + "demuxlet" + ], + "title": "softwareAndVersion" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + }, + "TotalReads": { + "description": "Total number of reads sequenced from the library.", + "title": "totalReads", + "type": "integer" + } + }, + "required": [ + "AlignmentReference", + "Assay", + "Component", + "InputCellCount", + "LibraryPrepMethod", + "NucleicAcidSource", + "PercentCellViability", + "Platform", + "SpecimenModality", + "TotalReads" + ], + "title": "SnATAC-seqAssayMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.SnATACSeqProcessedDataAnnotationTemplate.schema.json b/model_json_schema/ark.SnATACSeqProcessedDataAnnotationTemplate.schema.json index c956df17..1d027419 100644 --- a/model_json_schema/ark.SnATACSeqProcessedDataAnnotationTemplate.schema.json +++ b/model_json_schema/ark.SnATACSeqProcessedDataAnnotationTemplate.schema.json @@ -1,387 +1,388 @@ { + "$id": "http://example.com/SnATACSeqProcessedDataAnnotationTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "assay": { - "type": "array", - "items": { - "enum": [ - "CosMX", - "Olink Flex", - "CyTOF", - "snRNASeq", - "kiloplex", - "Olink Target 48", - "serial IHC", - "Xenium", - "Olink Focus", - "CITESeq", - "flow cytometry", - "NULISA", - "scVDJSeq", - "Visium", - "Olink Target 96", - "ASAPSeq", - "scRNASeq", - "RNASeq", - "snATACSeq", - "feature barcode sequencing", - "CE-MS", - "WES", - "SNP array", - "multiplexed ELISA", - "VDJSeq", - "imaging mass cytometry", - "H&E", - "WGS", - "LC-MS/MS", - "imaging mass spectrometry", - "Olink Explore HT", - "Olink Reveal", - "SomaScan", - "GenePS SeqFISH" - ] - }, - "maxItems": 34 - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "resourceType": { - "type": "array", - "items": { - "enum": [ - "metadata", - "experimental data" - ] - }, - "maxItems": 2 - }, - "specimenModality": { - "enum": [ - "unknown", - "multispecimen", - "single specimen" - ] - }, - "fileFormat": { - "enum": [ - "xls", - "Rds", - "bim", - "h5", - "h5ad", - "bai", - "zip", - "fam", - "tsv", - "csv", - "xlsx", - "bam", - "tgz", - "bed", - "mtx", - "txt" - ] - }, - "dataLevel": { - "enum": [ - "1", - "3", - "4", - "2", - "5" - ] - }, - "cellRangerOutput": { - "enum": [ - "filtered MEX", - "filtered_feature_bc_matrix", - "raw_feature_bc_matrix", - "Not Applicable", - "filtered_peak_bc_matrix", - "raw MEX", - "raw_peak_bc_matrix" - ] - }, - "targetPanelSize": {}, - "targetPanelSynID": {}, - "targetPanel": {}, - "metadataType": { - "enum": [ - "single-cell metadata", - "" - ] - }, - "processedDataType": { - "type": "array", - "items": { - "enum": [ - "gene counts", - "epigenomic peaks", - "differential expression results", - "barcode counts", - "" - ] - }, - "maxItems": 4 - }, - "individualID": {}, - "biospecimenID": {}, - "RObjectClass": { - "enum": [ - "vector", - "Symphony reference", - "Seurat object", - "list", - "sparse matrix", - "SummarizedExperiment", - "data.frame", - "matrix", - "ROCR prediction.object", - "" - ] - } - }, - "required": [ - "assay", - "Component", - "resourceType", - "specimenModality", - "fileFormat", - "dataLevel", - "cellRangerOutput" - ], "allOf": [ { "if": { "properties": { - "assay": { + "Assay": { "enum": [ - "feature barcode sequencing" + "Featurebarcodesequencing" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanelSize": {} + "TargetPanel": { + "not": { + "type": "null" + } + } }, "required": [ - "targetPanelSize" + "TargetPanel" ] } }, { "if": { "properties": { - "assay": { + "Assay": { "enum": [ - "feature barcode sequencing" + "Featurebarcodesequencing" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanelSynID": { + "TargetPanelSize": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "targetPanelSynID" + "TargetPanelSize" ] } }, { "if": { "properties": { - "assay": { + "Assay": { "enum": [ - "feature barcode sequencing" + "Featurebarcodesequencing" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanel": { + "TargetPanelSynID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "targetPanel" + "TargetPanelSynID" ] } }, { "if": { "properties": { - "resourceType": { + "FileFormat": { "enum": [ - "metadata" + "Rds" ] } - }, - "required": [ - "resourceType" - ] + } }, "then": { "properties": { - "metadataType": { - "enum": [ - "single-cell metadata" - ] + "RObjectClass": { + "not": { + "type": "null" + } } }, "required": [ - "metadataType" + "RObjectClass" ] } }, { "if": { "properties": { - "resourceType": { + "ResourceType": { "enum": [ - "experimental data" + "Experimentaldata" ] } - }, - "required": [ - "resourceType" - ] + } }, "then": { "properties": { - "processedDataType": { - "type": "array", - "items": { - "enum": [ - "gene counts", - "epigenomic peaks", - "differential expression results", - "barcode counts", - "" - ] - }, - "maxItems": 4 + "ProcessedDataType": { + "not": { + "type": "null" + } } }, "required": [ - "processedDataType" + "ProcessedDataType" ] } }, { "if": { "properties": { - "specimenModality": { + "ResourceType": { "enum": [ - "single specimen" + "Metadata" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "individualID": { + "MetadataType": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "individualID" + "MetadataType" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "single specimen" + "Singlespecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "BiospecimenID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "BiospecimenID" ] } }, { "if": { "properties": { - "fileFormat": { + "SpecimenModality": { "enum": [ - "Rds" + "Singlespecimen" ] } - }, - "required": [ - "fileFormat" - ] + } }, "then": { "properties": { - "RObjectClass": { - "enum": [ - "vector", - "Symphony reference", - "Seurat object", - "list", - "sparse matrix", - "SummarizedExperiment", - "data.frame", - "matrix", - "ROCR prediction.object", - "" - ] + "IndividualID": { + "not": { + "type": "null" + } } }, "required": [ - "RObjectClass" + "IndividualID" ] } } - ] + ], + "description": "A data contributor template outlining metadata to be collected as file annotations for snATAC-seq processed data files (i.e., anything not a fastq file).", + "properties": { + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "items": { + "enum": [ + "ASAPSeq", + "CE-MS", + "CITESeq", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", + "LC-MS/MS", + "NULISA", + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", + "WES", + "WGS", + "Xenium", + "feature barcode sequencing", + "flow cytometry", + "imaging mass cytometry", + "imaging mass spectrometry", + "kiloplex", + "multiplexed ELISA", + "scRNASeq", + "scVDJSeq", + "serial IHC", + "snATACSeq", + "snRNASeq" + ], + "type": "string" + }, + "title": "assay", + "type": "array" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "items": { + "type": "string" + }, + "title": "biospecimenID", + "type": "array" + }, + "CellRangerOutput": { + "description": "10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz", + "enum": [ + "Not Applicable", + "filtered MEX", + "filtered_feature_bc_matrix", + "filtered_peak_bc_matrix", + "raw MEX", + "raw_feature_bc_matrix", + "raw_peak_bc_matrix" + ], + "title": "cellRangerOutput" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataLevel": { + "description": "Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.", + "enum": [ + "1", + "2", + "3", + "4", + "5" + ], + "title": "dataLevel" + }, + "FileFormat": { + "description": "Standard file format name or file extension", + "enum": [ + "bai", + "bam", + "bed", + "bim", + "csv", + "fam", + "h5", + "h5ad", + "mtx", + "rds", + "tgz", + "tsv", + "txt", + "xls", + "xlsx", + "zip" + ], + "title": "fileFormat" + }, + "IndividualID": { + "description": "Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.", + "items": { + "type": "string" + }, + "title": "individualID", + "type": "array" + }, + "MetadataType": { + "description": "A label further classifying the content of metadata resource.", + "enum": [ + "single-cell metadata" + ], + "title": "metadataType" + }, + "ProcessedDataType": { + "description": "A label used for file annotations to provide a brief description of the processed data file.", + "items": { + "enum": [ + "barcode counts", + "differential expression results", + "epigenomic peaks", + "gene counts" + ], + "type": "string" + }, + "title": "processedDataType", + "type": "array" + }, + "RObjectClass": { + "description": "Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.", + "enum": [ + "ROCR prediction.object", + "Seurat object", + "SummarizedExperiment", + "Symphony reference", + "data.frame", + "list", + "matrix", + "sparse matrix", + "vector" + ], + "title": "RObjectClass" + }, + "ResourceType": { + "description": "High-level classification of the file content", + "items": { + "enum": [ + "experimental data", + "metadata" + ], + "type": "string" + }, + "title": "resourceType", + "type": "array" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + }, + "TargetPanel": { + "description": "A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.", + "title": "targetPanel", + "type": "string" + }, + "TargetPanelSize": { + "description": "The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).", + "title": "targetPanelSize", + "type": "integer" + }, + "TargetPanelSynID": { + "description": "In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.", + "pattern": "^syn[0-9]{8}", + "title": "targetPanelSynID", + "type": "string" + } + }, + "required": [ + "Assay", + "CellRangerOutput", + "Component", + "DataLevel", + "FileFormat", + "ResourceType", + "SpecimenModality" + ], + "title": "SnATACSeqProcessedDataAnnotationTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.SnRNASeqAssayMetadataTemplate.schema.json b/model_json_schema/ark.SnRNASeqAssayMetadataTemplate.schema.json index 95dfac5a..0926b1c0 100644 --- a/model_json_schema/ark.SnRNASeqAssayMetadataTemplate.schema.json +++ b/model_json_schema/ark.SnRNASeqAssayMetadataTemplate.schema.json @@ -1,285 +1,23 @@ { + "$id": "http://example.com/SnRNASeqAssayMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "dataCollectionBatch": {}, - "percentCellViability": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "softwareAndVersion": { - "enum": [ - "Cell Ranger v9.0.0", - "Cell Ranger v6.0.1", - "Cell Ranger v7.1.0", - "Cell Ranger ATAC v1.1.0", - "Cell Ranger v6.0.2", - "Space Ranger 3.1.1", - "Cell Ranger v6.1.1", - "Cell Ranger v7.0.1", - "Cell Ranger v4.0.0", - "Cell Ranger v3.0.1", - "Space Ranger 3.1.3", - "Cell Ranger v7.0.0", - "Cell Ranger v5.0.0", - "Cell Ranger v5.0.1", - "Cell Ranger v3.0.0", - "Cell Ranger v8.0.1", - "Cell Ranger v7.2.0", - "Cell Ranger v3.0.2", - "Cell Ranger 9.0.1", - "Cell Ranger v8.0.0", - "Cell Ranger v6.1.0", - "Cell Ranger v3.1.0", - "Cell Ranger v6.1.2", - "Cell Ranger v6.0.0", - "Space Ranger 3.0.0", - "Space Ranger 3.1.0", - "Space Ranger 3.1.2", - "Space Ranger 3.0.1", - "demuxlet", - "BD FACSDiva 8.0.1", - "" - ] - }, - "alignmentReference": { - "enum": [ - "10x Cell Ranger Human GRCh38 2020-A", - "GRCh38", - "modified GRCh38", - "10x Cell Ranger Human GRCh38 2024-A", - "vdj_GRCh38_alts_ensembl-4.0.0", - "unknown" - ] - }, - "nucleicAcidSource": { - "type": "array", - "items": { - "enum": [ - "globin-depleted RNA", - "CRISPR protospacer feature barcode", - "poly(A) RNA", - "gDNA", - "antigen capture barcode", - "multiplexing oligo", - "TCR mRNA", - "BCR mRNA", - "surface protein feature barcode", - "rRNA-depleted RNA", - "Tn5-accessible gDNA", - "intracellular protein feature barcode" - ] - }, - "maxItems": 12 - }, - "sampleProcessingBatch": {}, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "platform": { - "type": "array", - "items": { - "enum": [ - "BD FACSAria III", - "Illumina NovaSeq X", - "BD FACSDiscover A8", - "Thermo Fisher Attune NxT", - "Chromium GEM-X Single Cell 3' Chip v4", - "GEM-X Flex Gene Expression Chip", - "BD FACSDiscover S8", - "GEM-X OCM 5' Chip", - "BD FACSAria Fusion cell sorter", - "Thermo Fisher Attune Xenith", - "Not Applicable", - "Olink Signature Q100", - "Cytek Aurora Evo", - "Chromium Next GEM Chip Q", - "Chromium X", - "Chromium Xo", - "BD LSRFortessa", - "CyTOF XT", - "Chromium iX", - "BD FACSCanto", - "Visium CytAssist", - "Chromium Next GEM Chip M", - "BD FACSLyric Clinical", - "Chromium Next GEM Chip H", - "Chromium Controller", - "Helios Mass Cytometer", - "BD FACSMelody", - "Fluidigm BioMark", - "Xenium", - "none", - "Illumina NovaSeq 6000", - "Illumina NextSeq 500", - "Sony MA900", - "Hyperion", - "Chromium Next GEM Chip G", - "Thermo Fisher Attune CytPix", - "BD FACSCanto II", - "Illumina HiSeq 2500", - "Illumina HiSeq X Ten", - "BD FACSymphony S6", - "unknown", - "Cytek Aurora", - "Chromium Next GEM Chip K" - ] - }, - "maxItems": 43 - }, - "assay": { - "type": "array", - "items": { - "enum": [ - "imaging mass spectrometry", - "imaging mass cytometry", - "snRNASeq", - "Olink Target 96", - "Olink Explore HT", - "RNASeq", - "Visium", - "scVDJSeq", - "multiplexed ELISA", - "GenePS SeqFISH", - "Olink Target 48", - "SomaScan", - "CITESeq", - "ASAPSeq", - "VDJSeq", - "flow cytometry", - "Olink Flex", - "serial IHC", - "WES", - "WGS", - "LC-MS/MS", - "Olink Reveal", - "Olink Focus", - "H&E", - "Xenium", - "kiloplex", - "SNP array", - "CE-MS", - "feature barcode sequencing", - "NULISA", - "CyTOF", - "CosMX", - "scRNASeq", - "snATACSeq" - ] - }, - "maxItems": 34 - }, - "inputCellCount": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "sequencingSaturation": {}, - "totalReads": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "libraryPrepMethod": { - "enum": [ - "Nextera XT DNA", - "Takara Human scTCR profiling for Illumina", - "10x GEM-X Flex Gene Expression Human", - "SMARTer Stranded Total RNA v2", - "Takara Human TCR profiling for Illumina", - "SMART-Seq Human TCR with UMI", - "Chromium Next GEM Single Cell ATAC v1.1", - "10x Chromium Fixed RNA Human Transcriptome", - "10x GEM-X Universal 5' Gene Expression v3", - "10x Chromium GEM-X Single Cell 5' v3", - "10x Chromium Next GEM Single Cell 3' 3.1", - "NEBNext Ultra II Directional RNA Library", - "SMART-Seq v4 Ultra Low Input RNA", - "SMART-Seq Human BCR with UMI", - "Takara Human BCR profiling for Illumina", - "QIAseq miRNA Library", - "10x Chromium Single Cell Human TCR", - "CEL-Seq2", - "NEBNext Human Immune Sequencing Kit", - "10x Chromium Next GEM Single Cell 5' v1.1", - "10x Chromium Next GEM Single Cell 5' v2", - "Fluidigm C1 HT", - "Takara Human TCRv2 profiling for Illumina", - "Nextera XT", - "10x Chromium Next GEM Single Cell ATAC v2", - "10x Chromium GEM-X Single Cell 3' v4", - "TruSeq Stranded mRNA", - "10x Chromium Single Cell Human BCR", - "10x Chromium Next GEM Single Cell 3'", - "custom DASH-treatment", - "in-house library prep" - ] - }, - "specimenModality": { - "enum": [ - "multispecimen", - "unknown", - "single specimen" - ] - }, - "10xProbeSetReference": { - "enum": [ - "custom probe set", - "Flex Human Transcriptome Probe Set v1.0.1", - "Flex Human Transcriptome Probe Set v1.1.0", - "Visium Human Transcriptome v1", - "Visium Human Transcriptome v2", - "" - ] - }, - "libraryID": {}, - "biospecimenID": {} - }, - "required": [ - "percentCellViability", - "alignmentReference", - "nucleicAcidSource", - "Component", - "platform", - "assay", - "inputCellCount", - "totalReads", - "libraryPrepMethod", - "specimenModality" - ], "allOf": [ { "if": { "properties": { - "libraryPrepMethod": { + "LibraryPrepMethod": { "enum": [ - "10x GEM-X Flex Gene Expression Human" + "10xGEM-XFlexGeneExpressionHuman" ] } - }, - "required": [ - "libraryPrepMethod" - ] + } }, "then": { "properties": { "10xProbeSetReference": { - "enum": [ - "custom probe set", - "Flex Human Transcriptome Probe Set v1.0.1", - "Flex Human Transcriptome Probe Set v1.1.0", - "Visium Human Transcriptome v1", - "Visium Human Transcriptome v2" - ] + "not": { + "type": "null" + } } }, "required": [ @@ -290,56 +28,341 @@ { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "multispecimen" + "Multispecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "libraryID": { + "LibraryID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "libraryID" + "LibraryID" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "single specimen" + "Singlespecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "BiospecimenID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "BiospecimenID" ] } } - ] + ], + "description": "A template outlining metadata to be collected for each library in a snRNA-seq dataset.", + "properties": { + "10xProbeSetReference": { + "description": "Name of probe set used in 10x Chromium Flex, Xenium, or Visium data. If custom modified probe set was used the probe reference should be included as metadata accompanying the experimental data files.", + "enum": [ + "Flex Human Transcriptome Probe Set v1.0.1", + "Flex Human Transcriptome Probe Set v1.1.0", + "Visium Human Transcriptome v1", + "Visium Human Transcriptome v2", + "custom probe set" + ], + "title": "10xProbeSetReference" + }, + "AlignmentReference": { + "description": "The genomic/transcriptomic reference used for performing read alignment against.", + "enum": [ + "10x Cell Ranger Human GRCh38 2020-A", + "10x Cell Ranger Human GRCh38 2024-A", + "GRCh38", + "modified GRCh38", + "unknown", + "vdj_GRCh38_alts_ensembl-4.0.0" + ], + "title": "alignmentReference" + }, + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "items": { + "enum": [ + "ASAPSeq", + "CE-MS", + "CITESeq", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", + "LC-MS/MS", + "NULISA", + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", + "WES", + "WGS", + "Xenium", + "feature barcode sequencing", + "flow cytometry", + "imaging mass cytometry", + "imaging mass spectrometry", + "kiloplex", + "multiplexed ELISA", + "scRNASeq", + "scVDJSeq", + "serial IHC", + "snATACSeq", + "snRNASeq" + ], + "type": "string" + }, + "title": "assay", + "type": "array" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "items": { + "type": "string" + }, + "title": "biospecimenID", + "type": "array" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataCollectionBatch": { + "description": "A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.", + "title": "dataCollectionBatch", + "type": "string" + }, + "InputCellCount": { + "description": "An estimate of the number of cells expected to be sequenced in a library. Software that process single-cell sequencing data often include options for users to specify this value to improve processing results.", + "title": "inputCellCount", + "type": "integer" + }, + "LibraryID": { + "description": "A library label or name, unique within an experiment, used to distinguish sequencing libraries.", + "title": "libraryID", + "type": "string" + }, + "LibraryPrepMethod": { + "description": "Sequencing library preparation method or kit used to create the library. If no commercially available kit was used, please select 'in-house library prep'.", + "enum": [ + "10x Chromium Fixed RNA Human Transcriptome", + "10x Chromium GEM-X Single Cell 3' v4", + "10x Chromium GEM-X Single Cell 5' v3", + "10x Chromium Next GEM Single Cell 3'", + "10x Chromium Next GEM Single Cell 3' 3.1", + "10x Chromium Next GEM Single Cell 5' v1.1", + "10x Chromium Next GEM Single Cell 5' v2", + "10x Chromium Next GEM Single Cell ATAC v2", + "10x Chromium Single Cell Human BCR", + "10x Chromium Single Cell Human TCR", + "10x GEM-X Flex Gene Expression Human", + "10x GEM-X Universal 5' Gene Expression v3", + "CEL-Seq2", + "Chromium Next GEM Single Cell ATAC v1.1", + "Fluidigm C1 HT", + "NEBNext Human Immune Sequencing Kit", + "NEBNext Ultra II Directional RNA Library", + "Nextera XT", + "Nextera XT DNA", + "QIAseq miRNA Library", + "SMART-Seq Human BCR with UMI", + "SMART-Seq Human TCR with UMI", + "SMART-Seq v4 Ultra Low Input RNA", + "SMARTer Stranded Total RNA v2", + "Takara Human BCR profiling for Illumina", + "Takara Human TCR profiling for Illumina", + "Takara Human TCRv2 profiling for Illumina", + "Takara Human scTCR profiling for Illumina", + "TruSeq Stranded mRNA", + "custom DASH-treatment", + "in-house library prep" + ], + "title": "libraryPrepMethod" + }, + "NucleicAcidSource": { + "description": "The source of the nucleic acid used as input for sequencing library fragments. Select all that apply, though in most cases only a single label is expected.", + "items": { + "enum": [ + "BCR mRNA", + "CRISPR protospacer feature barcode", + "TCR mRNA", + "Tn5-accessible gDNA", + "antigen capture barcode", + "gDNA", + "globin-depleted RNA", + "intracellular protein feature barcode", + "multiplexing oligo", + "poly(A) RNA", + "rRNA-depleted RNA", + "surface protein feature barcode" + ], + "type": "string" + }, + "title": "nucleicAcidSource", + "type": "array" + }, + "PercentCellViability": { + "description": "A measure of the proportion of viable cells within a cell suspension. Scale is 0-100.", + "maximum": 100.0, + "minimum": 50.0, + "title": "percentCellViability", + "type": "integer" + }, + "Platform": { + "description": "The specific version (manufacturer, model, etc.) of a technology that is used to carry out a laboratory or computational experiment. Specify where applicable for experimental data files, else enter 'none'. In most cases only a single label is expected, however multiple selections can be provided in comma-delimited list where applicable e.g., for 10x Genomics fastq files please specify both the 10x instrument and the sequencing platform.", + "items": { + "enum": [ + "BD FACSAria Fusion cell sorter", + "BD FACSAria III", + "BD FACSCanto", + "BD FACSCanto II", + "BD FACSDiscover A8", + "BD FACSDiscover S8", + "BD FACSLyric Clinical", + "BD FACSMelody", + "BD FACSymphony S6", + "BD LSRFortessa", + "Chromium Controller", + "Chromium GEM-X Single Cell 3' Chip v4", + "Chromium Next GEM Chip G", + "Chromium Next GEM Chip H", + "Chromium Next GEM Chip K", + "Chromium Next GEM Chip M", + "Chromium Next GEM Chip Q", + "Chromium X", + "Chromium Xo", + "Chromium iX", + "CyTOF XT", + "Cytek Aurora", + "Cytek Aurora Evo", + "Fluidigm BioMark", + "GEM-X Flex Gene Expression Chip", + "GEM-X OCM 5' Chip", + "Helios Mass Cytometer", + "Hyperion", + "Illumina HiSeq 2500", + "Illumina HiSeq X Ten", + "Illumina NextSeq 500", + "Illumina NovaSeq 6000", + "Illumina NovaSeq X", + "Not Applicable", + "Olink Signature Q100", + "Sony MA900", + "Thermo Fisher Attune CytPix", + "Thermo Fisher Attune NxT", + "Thermo Fisher Attune Xenith", + "Visium CytAssist", + "Xenium", + "none", + "unknown" + ], + "type": "string" + }, + "title": "platform", + "type": "array" + }, + "SampleProcessingBatch": { + "description": "A label indicating batching of sample processing or preparation that occurs prior to data collection.", + "title": "sampleProcessingBatch", + "type": "string" + }, + "SequencingSaturation": { + "description": "A measure of the fraction of library complexity that was sequenced in a library. This metric quantifies the fraction of reads originating from an already-observed UMI. More specifically, this is the fraction of confidently mapped, valid cell-barcode, valid UMI reads that are non-unique. Scale is 0-1.", + "maximum": 1.0, + "minimum": 0.0, + "title": "sequencingSaturation", + "type": "number" + }, + "SoftwareAndVersion": { + "description": "Relevant software and version used to generate the data file.", + "enum": [ + "BD FACSDiva 8.0.1", + "Cell Ranger 9.0.1", + "Cell Ranger ATAC v1.1.0", + "Cell Ranger v3.0.0", + "Cell Ranger v3.0.1", + "Cell Ranger v3.0.2", + "Cell Ranger v3.1.0", + "Cell Ranger v4.0.0", + "Cell Ranger v5.0.0", + "Cell Ranger v5.0.1", + "Cell Ranger v6.0.0", + "Cell Ranger v6.0.1", + "Cell Ranger v6.0.2", + "Cell Ranger v6.1.0", + "Cell Ranger v6.1.1", + "Cell Ranger v6.1.2", + "Cell Ranger v7.0.0", + "Cell Ranger v7.0.1", + "Cell Ranger v7.1.0", + "Cell Ranger v7.2.0", + "Cell Ranger v8.0.0", + "Cell Ranger v8.0.1", + "Cell Ranger v9.0.0", + "Space Ranger 3.0.0", + "Space Ranger 3.0.1", + "Space Ranger 3.1.0", + "Space Ranger 3.1.1", + "Space Ranger 3.1.2", + "Space Ranger 3.1.3", + "demuxlet" + ], + "title": "softwareAndVersion" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + }, + "TotalReads": { + "description": "Total number of reads sequenced from the library.", + "title": "totalReads", + "type": "integer" + } + }, + "required": [ + "AlignmentReference", + "Assay", + "Component", + "InputCellCount", + "LibraryPrepMethod", + "NucleicAcidSource", + "PercentCellViability", + "Platform", + "SpecimenModality", + "TotalReads" + ], + "title": "SnRNASeqAssayMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.SnRNASeqProcessedDataAnnotationTemplate.schema.json b/model_json_schema/ark.SnRNASeqProcessedDataAnnotationTemplate.schema.json index 4a4e518e..647c7d3f 100644 --- a/model_json_schema/ark.SnRNASeqProcessedDataAnnotationTemplate.schema.json +++ b/model_json_schema/ark.SnRNASeqProcessedDataAnnotationTemplate.schema.json @@ -1,387 +1,388 @@ { + "$id": "http://example.com/SnRNASeqProcessedDataAnnotationTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "resourceType": { - "type": "array", - "items": { - "enum": [ - "metadata", - "experimental data" - ] - }, - "maxItems": 2 - }, - "fileFormat": { - "enum": [ - "xls", - "fam", - "bed", - "bai", - "Rds", - "bim", - "tsv", - "h5", - "tgz", - "zip", - "xlsx", - "txt", - "mtx", - "bam", - "csv", - "h5ad" - ] - }, - "specimenModality": { - "enum": [ - "multispecimen", - "unknown", - "single specimen" - ] - }, - "assay": { - "type": "array", - "items": { - "enum": [ - "H&E", - "Olink Focus", - "NULISA", - "CosMX", - "Olink Target 48", - "Olink Target 96", - "CITESeq", - "snATACSeq", - "scRNASeq", - "SNP array", - "snRNASeq", - "RNASeq", - "GenePS SeqFISH", - "SomaScan", - "Xenium", - "Visium", - "Olink Explore HT", - "CyTOF", - "Olink Reveal", - "imaging mass spectrometry", - "serial IHC", - "flow cytometry", - "LC-MS/MS", - "feature barcode sequencing", - "imaging mass cytometry", - "kiloplex", - "scVDJSeq", - "multiplexed ELISA", - "CE-MS", - "WES", - "VDJSeq", - "WGS", - "Olink Flex", - "ASAPSeq" - ] - }, - "maxItems": 34 - }, - "cellRangerOutput": { - "enum": [ - "filtered_peak_bc_matrix", - "raw MEX", - "raw_peak_bc_matrix", - "filtered MEX", - "raw_feature_bc_matrix", - "Not Applicable", - "filtered_feature_bc_matrix" - ] - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "dataLevel": { - "enum": [ - "1", - "3", - "2", - "4", - "5" - ] - }, - "metadataType": { - "enum": [ - "single-cell metadata", - "" - ] - }, - "processedDataType": { - "type": "array", - "items": { - "enum": [ - "epigenomic peaks", - "barcode counts", - "gene counts", - "differential expression results", - "" - ] - }, - "maxItems": 4 - }, - "RObjectClass": { - "enum": [ - "ROCR prediction.object", - "sparse matrix", - "SummarizedExperiment", - "list", - "matrix", - "Seurat object", - "vector", - "data.frame", - "Symphony reference", - "" - ] - }, - "individualID": {}, - "biospecimenID": {}, - "targetPanelSize": {}, - "targetPanel": {}, - "targetPanelSynID": {} - }, - "required": [ - "resourceType", - "fileFormat", - "specimenModality", - "assay", - "cellRangerOutput", - "Component", - "dataLevel" - ], "allOf": [ { "if": { "properties": { - "resourceType": { + "Assay": { "enum": [ - "metadata" + "Featurebarcodesequencing" ] } - }, - "required": [ - "resourceType" - ] + } }, "then": { "properties": { - "metadataType": { - "enum": [ - "single-cell metadata" - ] + "TargetPanel": { + "not": { + "type": "null" + } } }, "required": [ - "metadataType" + "TargetPanel" ] } }, { "if": { "properties": { - "resourceType": { + "Assay": { "enum": [ - "experimental data" + "Featurebarcodesequencing" ] } - }, - "required": [ - "resourceType" - ] + } }, "then": { "properties": { - "processedDataType": { - "type": "array", - "items": { - "enum": [ - "epigenomic peaks", - "barcode counts", - "gene counts", - "differential expression results", - "" - ] - }, - "maxItems": 4 + "TargetPanelSize": { + "not": { + "type": "null" + } } }, "required": [ - "processedDataType" + "TargetPanelSize" ] } }, { "if": { "properties": { - "fileFormat": { + "Assay": { "enum": [ - "Rds" + "Featurebarcodesequencing" ] } - }, - "required": [ - "fileFormat" - ] + } }, "then": { "properties": { - "RObjectClass": { - "enum": [ - "ROCR prediction.object", - "sparse matrix", - "SummarizedExperiment", - "list", - "matrix", - "Seurat object", - "vector", - "data.frame", - "Symphony reference", - "" - ] + "TargetPanelSynID": { + "not": { + "type": "null" + } } }, "required": [ - "RObjectClass" + "TargetPanelSynID" ] } }, { "if": { "properties": { - "specimenModality": { + "FileFormat": { "enum": [ - "single specimen" + "Rds" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "individualID": { + "RObjectClass": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "individualID" + "RObjectClass" ] } }, { "if": { "properties": { - "specimenModality": { + "ResourceType": { "enum": [ - "single specimen" + "Experimentaldata" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "ProcessedDataType": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "ProcessedDataType" ] } }, { "if": { "properties": { - "assay": { + "ResourceType": { "enum": [ - "feature barcode sequencing" + "Metadata" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanelSize": {} + "MetadataType": { + "not": { + "type": "null" + } + } }, "required": [ - "targetPanelSize" + "MetadataType" ] } }, { "if": { "properties": { - "assay": { + "SpecimenModality": { "enum": [ - "feature barcode sequencing" + "Singlespecimen" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanel": { + "BiospecimenID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "targetPanel" + "BiospecimenID" ] } }, { "if": { "properties": { - "assay": { + "SpecimenModality": { "enum": [ - "feature barcode sequencing" + "Singlespecimen" ] } - }, - "required": [ - "assay" - ] + } }, "then": { "properties": { - "targetPanelSynID": { + "IndividualID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "targetPanelSynID" + "IndividualID" ] } } - ] + ], + "description": "A data contributor template outlining metadata to be collected as file annotations for snRNA-seq processed data files (i.e., anything not a fastq file).", + "properties": { + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "items": { + "enum": [ + "ASAPSeq", + "CE-MS", + "CITESeq", + "CosMX", + "CyTOF", + "GenePS SeqFISH", + "H&E", + "LC-MS/MS", + "NULISA", + "Olink Explore HT", + "Olink Flex", + "Olink Focus", + "Olink Reveal", + "Olink Target 48", + "Olink Target 96", + "RNASeq", + "SNP array", + "SomaScan", + "VDJSeq", + "Visium", + "WES", + "WGS", + "Xenium", + "feature barcode sequencing", + "flow cytometry", + "imaging mass cytometry", + "imaging mass spectrometry", + "kiloplex", + "multiplexed ELISA", + "scRNASeq", + "scVDJSeq", + "serial IHC", + "snATACSeq", + "snRNASeq" + ], + "type": "string" + }, + "title": "assay", + "type": "array" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "items": { + "type": "string" + }, + "title": "biospecimenID", + "type": "array" + }, + "CellRangerOutput": { + "description": "10x Genomics Cell Ranger software output several different counts results and formats, some with different processing applied. This label distinguishes between these types and is particularly helpful when multiple files are uploaded with the sample name, e.g., barcodes.tsv.gz", + "enum": [ + "Not Applicable", + "filtered MEX", + "filtered_feature_bc_matrix", + "filtered_peak_bc_matrix", + "raw MEX", + "raw_feature_bc_matrix", + "raw_peak_bc_matrix" + ], + "title": "cellRangerOutput" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataLevel": { + "description": "Level of data processing applied to file. Levels refer to pre-defined standards of processing for the given assay.", + "enum": [ + "1", + "2", + "3", + "4", + "5" + ], + "title": "dataLevel" + }, + "FileFormat": { + "description": "Standard file format name or file extension", + "enum": [ + "bai", + "bam", + "bed", + "bim", + "csv", + "fam", + "h5", + "h5ad", + "mtx", + "rds", + "tgz", + "tsv", + "txt", + "xls", + "xlsx", + "zip" + ], + "title": "fileFormat" + }, + "IndividualID": { + "description": "Unique identifier assigned to each study participant. For multi-specimen data files provide all IDs in a comma-separated list.", + "items": { + "type": "string" + }, + "title": "individualID", + "type": "array" + }, + "MetadataType": { + "description": "A label further classifying the content of metadata resource.", + "enum": [ + "single-cell metadata" + ], + "title": "metadataType" + }, + "ProcessedDataType": { + "description": "A label used for file annotations to provide a brief description of the processed data file.", + "items": { + "enum": [ + "barcode counts", + "differential expression results", + "epigenomic peaks", + "gene counts" + ], + "type": "string" + }, + "title": "processedDataType", + "type": "array" + }, + "RObjectClass": { + "description": "Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.", + "enum": [ + "ROCR prediction.object", + "Seurat object", + "SummarizedExperiment", + "Symphony reference", + "data.frame", + "list", + "matrix", + "sparse matrix", + "vector" + ], + "title": "RObjectClass" + }, + "ResourceType": { + "description": "High-level classification of the file content", + "items": { + "enum": [ + "experimental data", + "metadata" + ], + "type": "string" + }, + "title": "resourceType", + "type": "array" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + }, + "TargetPanel": { + "description": "A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.", + "title": "targetPanel", + "type": "string" + }, + "TargetPanelSize": { + "description": "The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).", + "title": "targetPanelSize", + "type": "integer" + }, + "TargetPanelSynID": { + "description": "In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.", + "pattern": "^syn[0-9]{8}", + "title": "targetPanelSynID", + "type": "string" + } + }, + "required": [ + "Assay", + "CellRangerOutput", + "Component", + "DataLevel", + "FileFormat", + "ResourceType", + "SpecimenModality" + ], + "title": "SnRNASeqProcessedDataAnnotationTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.SpatialImagingAssayMetadataTemplate.schema.json b/model_json_schema/ark.SpatialImagingAssayMetadataTemplate.schema.json index ad314461..7e7ef620 100644 --- a/model_json_schema/ark.SpatialImagingAssayMetadataTemplate.schema.json +++ b/model_json_schema/ark.SpatialImagingAssayMetadataTemplate.schema.json @@ -1,112 +1,130 @@ { + "$id": "http://example.com/SpatialImagingAssayMetadataTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "targetPanelSize": {}, - "assay": { - "enum": [ - "H&E", - "CosMX", - "serial IHC", - "Xenium", - "GenePS SeqFISH", - "imaging mass cytometry", - "Visium" - ] - }, - "specimenModality": { - "enum": [ - "multispecimen", - "unknown", - "single specimen" - ] - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "targetPanelSynID": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "sampleProcessingBatch": {}, - "targetPanel": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "dataCollectionBatch": {}, - "slideID": {}, - "biospecimenID": {} - }, - "required": [ - "assay", - "specimenModality", - "Component", - "targetPanelSynID", - "targetPanel" - ], "allOf": [ { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "multispecimen" + "Multispecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "slideID": { + "SlideID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "slideID" + "SlideID" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "single specimen" + "Singlespecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "BiospecimenID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "BiospecimenID" ] } } - ] + ], + "description": "A template outlining metadata to be collected for each slide in a spatial transcriptomic or imaging based dataset.", + "properties": { + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "enum": [ + "CosMX", + "GenePS SeqFISH", + "H&E", + "Visium", + "Xenium", + "imaging mass cytometry", + "serial IHC" + ], + "title": "assay" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "items": { + "type": "string" + }, + "title": "biospecimenID", + "type": "array" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "DataCollectionBatch": { + "description": "A label indicating batching that occurs during data collection, e.g., collecting data across multiple days.", + "title": "dataCollectionBatch", + "type": "string" + }, + "SampleProcessingBatch": { + "description": "A label indicating batching of sample processing or preparation that occurs prior to data collection.", + "title": "sampleProcessingBatch", + "type": "string" + }, + "SlideID": { + "description": "A distinct label or name, unique within an experiment, assigned to an imaging slides.", + "title": "slideID", + "type": "string" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + }, + "TargetPanel": { + "description": "A unique or established human-readable name assigned to the panel of targets profiled in the experiment. For example, the panel of antibodies and corresponding fluorophores used in a flow cytometry experiment or panel used in a Xenium spatial transcriptomics experiment.", + "title": "targetPanel", + "type": "string" + }, + "TargetPanelSize": { + "description": "The number of gene, transcript, protein, etc., targets profiled in the assay for assays that use a pre-defined set of probes, antibodies, etc., to measure biological components in samples. The input value is expected to be a whole integer that matches the number of targets described in the accompanying target panel metadata (i.e., targetPanelSynID).", + "title": "targetPanelSize", + "type": "integer" + }, + "TargetPanelSynID": { + "description": "In most cases an accompanying metadata file should be provided that details information about the targets profiled in the experiment. This attribute links experimental data files to the target panel metadata via the synapse ID of that file.", + "pattern": "^syn[0-9]{8}", + "title": "targetPanelSynID", + "type": "string" + } + }, + "required": [ + "Assay", + "Component", + "SpecimenModality", + "TargetPanel", + "TargetPanelSynID" + ], + "title": "SpatialImagingAssayMetadataTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_json_schema/ark.SpatialImagingFileAnnotationTemplate.schema.json b/model_json_schema/ark.SpatialImagingFileAnnotationTemplate.schema.json index 3046bbe4..462c4808 100644 --- a/model_json_schema/ark.SpatialImagingFileAnnotationTemplate.schema.json +++ b/model_json_schema/ark.SpatialImagingFileAnnotationTemplate.schema.json @@ -1,155 +1,23 @@ { + "$id": "http://example.com/SpatialImagingFileAnnotationTemplate", "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/ark", - "title": "ark", - "type": "object", - "properties": { - "fileFormat": { - "enum": [ - "fcs", - "h5ad", - "h5", - "txt", - "bim", - "geojson", - "tbi", - "vcf", - "tsv", - "mcd", - "erate", - "bai", - "py", - "bed", - "dose", - "czi", - "fastq", - "rds", - "fam", - "xls", - "tgz", - "zip", - "svs", - "parquet", - "xlsx", - "bam", - "pdf", - "rec", - "csv", - "info", - "mtx", - "docx" - ] - }, - "assay": { - "enum": [ - "Visium", - "CosMX", - "GenePS SeqFISH", - "Xenium", - "imaging mass cytometry", - "H&E", - "serial IHC" - ] - }, - "Component": { - "not": { - "type": "null" - }, - "minLength": 1 - }, - "resourceType": { - "type": "array", - "items": { - "enum": [ - "metadata", - "experimental data" - ] - }, - "maxItems": 2 - }, - "specimenModality": { - "enum": [ - "multispecimen", - "unknown", - "single specimen" - ] - }, - "RObjectClass": { - "enum": [ - "data.frame", - "sparse matrix", - "Seurat object", - "vector", - "Symphony reference", - "ROCR prediction.object", - "matrix", - "list", - "SummarizedExperiment", - "" - ] - }, - "metadataType": { - "enum": [ - "target panel", - "cell coordinates", - "tissue microarray map", - "single-cell metadata", - "other", - "" - ] - }, - "processedDataType": { - "type": "array", - "items": { - "enum": [ - "differential expression results", - "epigenomic peaks", - "gene counts", - "barcode counts", - "" - ] - }, - "maxItems": 4 - }, - "slideID": {}, - "biospecimenID": {} - }, - "required": [ - "fileFormat", - "assay", - "Component", - "resourceType", - "specimenModality" - ], "allOf": [ { "if": { "properties": { - "fileFormat": { + "FileFormat": { "enum": [ - "rds" + "Rds" ] } - }, - "required": [ - "fileFormat" - ] + } }, "then": { "properties": { "RObjectClass": { - "enum": [ - "data.frame", - "sparse matrix", - "Seurat object", - "vector", - "Symphony reference", - "ROCR prediction.object", - "matrix", - "list", - "SummarizedExperiment", - "" - ] + "not": { + "type": "null" + } } }, "required": [ @@ -160,120 +28,236 @@ { "if": { "properties": { - "resourceType": { + "ResourceType": { "enum": [ - "metadata" + "Experimentaldata" ] } - }, - "required": [ - "resourceType" - ] + } }, "then": { "properties": { - "metadataType": { - "enum": [ - "target panel", - "cell coordinates", - "tissue microarray map", - "single-cell metadata", - "other" - ] + "ProcessedDataType": { + "not": { + "type": "null" + } } }, "required": [ - "metadataType" + "ProcessedDataType" ] } }, { "if": { "properties": { - "resourceType": { + "ResourceType": { "enum": [ - "experimental data" + "Metadata" ] } - }, - "required": [ - "resourceType" - ] + } }, "then": { "properties": { - "processedDataType": { - "type": "array", - "items": { - "enum": [ - "differential expression results", - "epigenomic peaks", - "gene counts", - "barcode counts", - "" - ] - }, - "maxItems": 4 + "MetadataType": { + "not": { + "type": "null" + } } }, "required": [ - "processedDataType" + "MetadataType" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "multispecimen" + "Multispecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "slideID": { + "SlideID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "slideID" + "SlideID" ] } }, { "if": { "properties": { - "specimenModality": { + "SpecimenModality": { "enum": [ - "single specimen" + "Singlespecimen" ] } - }, - "required": [ - "specimenModality" - ] + } }, "then": { "properties": { - "biospecimenID": { + "BiospecimenID": { "not": { "type": "null" - }, - "minLength": 1 + } } }, "required": [ - "biospecimenID" + "BiospecimenID" ] } } - ] + ], + "description": "A data contributor template outlining metadata to be collected as file annotations for imaging-based data files.", + "properties": { + "Assay": { + "description": "The technology used to generate the data in this file. For multimodal datasets with concomitant profiling of biospecimen select all assays that apply. e.g., the GEX files from a CITE-seq experiment should be labeled with both 'scRNASeq' and 'CITESeq'.", + "enum": [ + "CosMX", + "GenePS SeqFISH", + "H&E", + "Visium", + "Xenium", + "imaging mass cytometry", + "serial IHC" + ], + "title": "assay" + }, + "BiospecimenID": { + "description": "A unique identifier assigned to specimens collected from study participants. For multi-specimen data files provide all IDs in a comma-separated list.", + "items": { + "type": "string" + }, + "title": "biospecimenID", + "type": "array" + }, + "Component": { + "description": "A high-level attribute for grouping attributes into templates.", + "title": "Component", + "type": "string" + }, + "FileFormat": { + "description": "Standard file format name or file extension", + "enum": [ + "bai", + "bam", + "bed", + "bim", + "csv", + "czi", + "docx", + "dose", + "erate", + "fam", + "fastq", + "fcs", + "geojson", + "h5", + "h5ad", + "info", + "mcd", + "mtx", + "parquet", + "pdf", + "py", + "rds", + "rec", + "svs", + "tbi", + "tgz", + "tsv", + "txt", + "vcf", + "xls", + "xlsx", + "zip" + ], + "title": "fileFormat" + }, + "MetadataType": { + "description": "A label further classifying the content of metadata resource.", + "enum": [ + "cell coordinates", + "other", + "single-cell metadata", + "target panel", + "tissue microarray map" + ], + "title": "metadataType" + }, + "ProcessedDataType": { + "description": "A label used for file annotations to provide a brief description of the processed data file.", + "items": { + "enum": [ + "barcode counts", + "differential expression results", + "epigenomic peaks", + "gene counts" + ], + "type": "string" + }, + "title": "processedDataType", + "type": "array" + }, + "RObjectClass": { + "description": "Rds files store R objects, one per file. This label details the class of the R object saved to the Rds file or other similar file types.", + "enum": [ + "ROCR prediction.object", + "Seurat object", + "SummarizedExperiment", + "Symphony reference", + "data.frame", + "list", + "matrix", + "sparse matrix", + "vector" + ], + "title": "RObjectClass" + }, + "ResourceType": { + "description": "High-level classification of the file content", + "items": { + "enum": [ + "experimental data", + "metadata" + ], + "type": "string" + }, + "title": "resourceType", + "type": "array" + }, + "SlideID": { + "description": "A distinct label or name, unique within an experiment, assigned to an imaging slides.", + "title": "slideID", + "type": "string" + }, + "SpecimenModality": { + "description": "Label assigned to experimental data files indicating whether the data contained corresponds to a single or multiple biospecimens", + "enum": [ + "multispecimen", + "single specimen", + "unknown" + ], + "title": "specimenModality" + } + }, + "required": [ + "Assay", + "Component", + "FileFormat", + "ResourceType", + "SpecimenModality" + ], + "title": "SpatialImagingFileAnnotationTemplate", + "type": "object" } \ No newline at end of file diff --git a/model_templates/ark.BDMFCSFileAnnotations.csv b/model_templates/ark.BDMFCSFileAnnotations.csv index 6969d8f6..0b320324 100644 --- a/model_templates/ark.BDMFCSFileAnnotations.csv +++ b/model_templates/ark.BDMFCSFileAnnotations.csv @@ -1 +1 @@ -Component,primaryCellSource,parentBiospecimenID,eventCount,diagnosis,sampleProcessingBatch,biospecimenSubtype,cellType,project,assay,species,biospecimenType,platform,specimenModality,dataSubtype,dataCollectionBatch,resourceType,fileFormat,dataType,userDefinedCellType,program,individualID,biospecimenID,visitID +Assay,BiospecimenID,BiospecimenSubtype,BiospecimenType,CellType,Component,DataCollectionBatch,DataSubtype,DataType,Diagnosis,EventCount,FileFormat,IndividualID,ParentBiospecimenID,Platform,PrimaryCellSource,Program,Project,ResourceType,SampleProcessingBatch,Species,SpecimenModality,UserDefinedCellType,VisitID diff --git a/model_templates/ark.BDMFastqFileAnnotations.csv b/model_templates/ark.BDMFastqFileAnnotations.csv index 14c9c417..adc6f021 100644 --- a/model_templates/ark.BDMFastqFileAnnotations.csv +++ b/model_templates/ark.BDMFastqFileAnnotations.csv @@ -1 +1 @@ -dataType,dataSubtype,diagnosis,nucleicAcidSource,readLength,species,resourceType,parentBiospecimenID,cellType,primaryCellSource,libraryPrepMethod,userDefinedCellType,biospecimenType,program,fileFormat,specimenModality,Component,platform,assay,project,biospecimenSubtype,visitID,libraryID,individualID,biospecimenID,targetPanelSize,targetPanelSynID,targetPanel +Assay,BiospecimenID,BiospecimenSubtype,BiospecimenType,CellType,Component,DataSubtype,DataType,Diagnosis,FileFormat,IndividualID,LibraryID,LibraryPrepMethod,NucleicAcidSource,ParentBiospecimenID,Platform,PrimaryCellSource,Program,Project,ReadLength,ResourceType,Species,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID,UserDefinedCellType,VisitID diff --git a/model_templates/ark.BDMMetadataFileAnnotations.csv b/model_templates/ark.BDMMetadataFileAnnotations.csv index 579cc0e6..8b56bf86 100644 --- a/model_templates/ark.BDMMetadataFileAnnotations.csv +++ b/model_templates/ark.BDMMetadataFileAnnotations.csv @@ -1 +1 @@ -program,dataType,species,project,fileFormat,primaryCellSource,userDefinedCellType,resourceType,cellType,Component,programPhase,metadataType,metadataStandards,assay +Assay,CellType,Component,DataType,FileFormat,MetadataStandards,MetadataType,PrimaryCellSource,Program,ProgramPhase,Project,ResourceType,Species,UserDefinedCellType diff --git a/model_templates/ark.BDMOlinkFileAnnotations.csv b/model_templates/ark.BDMOlinkFileAnnotations.csv index 0f13dde5..8863aeda 100644 --- a/model_templates/ark.BDMOlinkFileAnnotations.csv +++ b/model_templates/ark.BDMOlinkFileAnnotations.csv @@ -1 +1 @@ -cellType,dataSubtype,dataType,plateID,visitID,platform,diagnosis,targetPanel,species,targetPanelSize,resourceType,targetPanelSynID,fileFormat,specimenModality,biospecimenType,program,project,userDefinedCellType,primaryCellSource,Component,programPhase +BiospecimenType,CellType,Component,DataSubtype,DataType,Diagnosis,FileFormat,PlateID,Platform,PrimaryCellSource,Program,ProgramPhase,Project,ResourceType,Species,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID,UserDefinedCellType,VisitID diff --git a/model_templates/ark.BDMSpatialImagingFileAnnotations.csv b/model_templates/ark.BDMSpatialImagingFileAnnotations.csv index 7d5b65f9..2e663a7e 100644 --- a/model_templates/ark.BDMSpatialImagingFileAnnotations.csv +++ b/model_templates/ark.BDMSpatialImagingFileAnnotations.csv @@ -1 +1 @@ -parentBiospecimenID,Component,dataType,biospecimenSubtype,resourceType,program,dataSubtype,specimenModality,targetPanel,project,userDefinedCellType,targetPanelSynID,biospecimenType,primaryCellSource,fileFormat,cellType,visitID,targetPanelSize,assay,diagnosis,species,processedDataType,metadataType,slideID,biospecimenID,RObjectClass +Assay,BiospecimenID,BiospecimenSubtype,BiospecimenType,CellType,Component,DataSubtype,DataType,Diagnosis,FileFormat,MetadataType,ParentBiospecimenID,PrimaryCellSource,ProcessedDataType,Program,Project,RObjectClass,ResourceType,SlideID,Species,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID,UserDefinedCellType,VisitID diff --git a/model_templates/ark.BDMscRNASeqProcessedDataAnnotations.csv b/model_templates/ark.BDMscRNASeqProcessedDataAnnotations.csv index 1909341a..df74f459 100644 --- a/model_templates/ark.BDMscRNASeqProcessedDataAnnotations.csv +++ b/model_templates/ark.BDMscRNASeqProcessedDataAnnotations.csv @@ -1 +1 @@ -assay,fileFormat,project,biospecimenType,diagnosis,primaryCellSource,dataLevel,visitID,cellRangerOutput,libraryPrepMethod,nucleicAcidSource,program,userDefinedCellType,biospecimenSubtype,species,cellType,specimenModality,softwareAndVersion,resourceType,dataType,dataSubtype,Component,parentBiospecimenID,alignmentReference,targetPanelSize,targetPanelSynID,targetPanel,RObjectClass,individualID,biospecimenID,metadataType,processedDataType +AlignmentReference,Assay,BiospecimenID,BiospecimenSubtype,BiospecimenType,CellRangerOutput,CellType,Component,DataLevel,DataSubtype,DataType,Diagnosis,FileFormat,IndividualID,LibraryPrepMethod,MetadataType,NucleicAcidSource,ParentBiospecimenID,PrimaryCellSource,ProcessedDataType,Program,Project,RObjectClass,ResourceType,SoftwareAndVersion,Species,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID,UserDefinedCellType,VisitID diff --git a/model_templates/ark.BDMscVDJSeqProcessedDataAnnotations.csv b/model_templates/ark.BDMscVDJSeqProcessedDataAnnotations.csv index 11f71f3a..80e759fc 100644 --- a/model_templates/ark.BDMscVDJSeqProcessedDataAnnotations.csv +++ b/model_templates/ark.BDMscVDJSeqProcessedDataAnnotations.csv @@ -1 +1 @@ -cellType,softwareAndVersion,dataLevel,species,fileFormat,program,visitID,primaryCellSource,specimenModality,dataType,alignmentReference,biospecimenSubtype,parentBiospecimenID,diagnosis,Component,libraryPrepMethod,resourceType,nucleicAcidSource,userDefinedCellType,dataSubtype,project,biospecimenType,assay,RObjectClass,individualID,biospecimenID,metadataType,processedDataType,targetPanel,targetPanelSize,targetPanelSynID +AlignmentReference,Assay,BiospecimenID,BiospecimenSubtype,BiospecimenType,CellType,Component,DataLevel,DataSubtype,DataType,Diagnosis,FileFormat,IndividualID,LibraryPrepMethod,MetadataType,NucleicAcidSource,ParentBiospecimenID,PrimaryCellSource,ProcessedDataType,Program,Project,RObjectClass,ResourceType,SoftwareAndVersion,Species,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID,UserDefinedCellType,VisitID diff --git a/model_templates/ark.BDMsnATACSeqProcessedDataAnnotations.csv b/model_templates/ark.BDMsnATACSeqProcessedDataAnnotations.csv index 7fc8797f..df74f459 100644 --- a/model_templates/ark.BDMsnATACSeqProcessedDataAnnotations.csv +++ b/model_templates/ark.BDMsnATACSeqProcessedDataAnnotations.csv @@ -1 +1 @@ -biospecimenType,dataLevel,project,dataType,assay,biospecimenSubtype,libraryPrepMethod,resourceType,visitID,diagnosis,parentBiospecimenID,cellRangerOutput,softwareAndVersion,primaryCellSource,userDefinedCellType,specimenModality,cellType,nucleicAcidSource,dataSubtype,alignmentReference,Component,program,species,fileFormat,targetPanelSynID,targetPanelSize,targetPanel,processedDataType,metadataType,biospecimenID,individualID,RObjectClass +AlignmentReference,Assay,BiospecimenID,BiospecimenSubtype,BiospecimenType,CellRangerOutput,CellType,Component,DataLevel,DataSubtype,DataType,Diagnosis,FileFormat,IndividualID,LibraryPrepMethod,MetadataType,NucleicAcidSource,ParentBiospecimenID,PrimaryCellSource,ProcessedDataType,Program,Project,RObjectClass,ResourceType,SoftwareAndVersion,Species,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID,UserDefinedCellType,VisitID diff --git a/model_templates/ark.BDMsnRNASeqProcessedDataAnnotations.csv b/model_templates/ark.BDMsnRNASeqProcessedDataAnnotations.csv index d0da10da..df74f459 100644 --- a/model_templates/ark.BDMsnRNASeqProcessedDataAnnotations.csv +++ b/model_templates/ark.BDMsnRNASeqProcessedDataAnnotations.csv @@ -1 +1 @@ -biospecimenType,nucleicAcidSource,libraryPrepMethod,dataSubtype,dataLevel,cellType,cellRangerOutput,biospecimenSubtype,project,visitID,program,softwareAndVersion,specimenModality,primaryCellSource,Component,dataType,resourceType,species,diagnosis,assay,fileFormat,alignmentReference,userDefinedCellType,parentBiospecimenID,individualID,biospecimenID,metadataType,processedDataType,targetPanelSynID,targetPanelSize,targetPanel,RObjectClass +AlignmentReference,Assay,BiospecimenID,BiospecimenSubtype,BiospecimenType,CellRangerOutput,CellType,Component,DataLevel,DataSubtype,DataType,Diagnosis,FileFormat,IndividualID,LibraryPrepMethod,MetadataType,NucleicAcidSource,ParentBiospecimenID,PrimaryCellSource,ProcessedDataType,Program,Project,RObjectClass,ResourceType,SoftwareAndVersion,Species,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID,UserDefinedCellType,VisitID diff --git a/model_templates/ark.BiospecimenMetadataTemplate.csv b/model_templates/ark.BiospecimenMetadataTemplate.csv index 008f3ec0..fdcdae24 100644 --- a/model_templates/ark.BiospecimenMetadataTemplate.csv +++ b/model_templates/ark.BiospecimenMetadataTemplate.csv @@ -1 +1 @@ -notes,program,project,biospecimenType,Component,biospecimenID,individualID,altSampleID,parentBiospecimenID,sampleCollectionBatch,biospecimenSubtype,visitID,skinSiteStatus,anatomicalSite,salivaCollectionProcedure,primaryCellSource,cellType,cellOntologyID,krennLining,krennInflammatory,synovialCollectionProcedure,krennStroma,krennSynovitisScore,userDefinedCellType,FACSPopulation +AltSampleID,AnatomicalSite,BiospecimenID,BiospecimenSubtype,BiospecimenType,CellOntologyID,CellType,Component,FACSPopulation,IndividualID,KrennInflammatory,KrennLining,KrennStroma,KrennSynovitisScore,Notes,ParentBiospecimenID,PrimaryCellSource,Program,Project,SalivaCollectionProcedure,SampleCollectionBatch,SkinSiteStatus,SynovialCollectionProcedure,UserDefinedCellType,VisitID diff --git a/model_templates/ark.BulkATAC-seqAssayMetadataTemplate.csv b/model_templates/ark.BulkATAC-seqAssayMetadataTemplate.csv index a1059f48..bd070a6f 100644 --- a/model_templates/ark.BulkATAC-seqAssayMetadataTemplate.csv +++ b/model_templates/ark.BulkATAC-seqAssayMetadataTemplate.csv @@ -1 +1 @@ -totalReads,specimenModality,softwareAndVersion,libraryPrepMethod,dataCollectionBatch,nucleicAcidSource,Component,sampleProcessingBatch,assay,alignmentReference,platform,biospecimenID,libraryID,10xProbeSetReference +10xProbeSetReference,AlignmentReference,Assay,BiospecimenID,Component,DataCollectionBatch,LibraryID,LibraryPrepMethod,NucleicAcidSource,Platform,SampleProcessingBatch,SoftwareAndVersion,SpecimenModality,TotalReads diff --git a/model_templates/ark.BulkRNA-seqAssayMetadataTemplate.csv b/model_templates/ark.BulkRNA-seqAssayMetadataTemplate.csv index ae2aa148..bd070a6f 100644 --- a/model_templates/ark.BulkRNA-seqAssayMetadataTemplate.csv +++ b/model_templates/ark.BulkRNA-seqAssayMetadataTemplate.csv @@ -1 +1 @@ -dataCollectionBatch,sampleProcessingBatch,specimenModality,Component,libraryPrepMethod,platform,softwareAndVersion,totalReads,alignmentReference,nucleicAcidSource,assay,biospecimenID,libraryID,10xProbeSetReference +10xProbeSetReference,AlignmentReference,Assay,BiospecimenID,Component,DataCollectionBatch,LibraryID,LibraryPrepMethod,NucleicAcidSource,Platform,SampleProcessingBatch,SoftwareAndVersion,SpecimenModality,TotalReads diff --git a/model_templates/ark.ClinicalMetadataTemplate.csv b/model_templates/ark.ClinicalMetadataTemplate.csv index f9c54595..e82d5d3e 100644 --- a/model_templates/ark.ClinicalMetadataTemplate.csv +++ b/model_templates/ark.ClinicalMetadataTemplate.csv @@ -1 +1 @@ -heightUnits,comorbidities,age,ethnicity,ageUnits,weight,program,Component,species,individualID,sex,race,weightUnits,project,height,diagnosis,PASI,diabetesType,visitID,CDASI,VASI,VETI,vitiligoPattern,VIDA +Age,AgeUnits,CDASI,Comorbidities,Component,DiabetesType,Diagnosis,Ethnicity,Height,HeightUnits,IndividualID,PASI,Program,Project,Race,Sex,Species,VASI,VETI,VIDA,VisitID,VitiligoPattern,Weight,WeightUnits diff --git a/model_templates/ark.CyTOFAssayMetadataTemplate.csv b/model_templates/ark.CyTOFAssayMetadataTemplate.csv index 9644aa59..44752f49 100644 --- a/model_templates/ark.CyTOFAssayMetadataTemplate.csv +++ b/model_templates/ark.CyTOFAssayMetadataTemplate.csv @@ -1 +1 @@ -softwareAndVersion,dataCollectionBatch,Component,targetPanelSynID,biospecimenID,sampleProcessingBatch,platform,targetPanelSize,assay,targetPanel +Assay,BiospecimenID,Component,DataCollectionBatch,Platform,SampleProcessingBatch,SoftwareAndVersion,TargetPanel,TargetPanelSize,TargetPanelSynID diff --git a/model_templates/ark.DatasetAnnotationTemplate.csv b/model_templates/ark.DatasetAnnotationTemplate.csv index fe7f3864..249e6215 100644 --- a/model_templates/ark.DatasetAnnotationTemplate.csv +++ b/model_templates/ark.DatasetAnnotationTemplate.csv @@ -1 +1 @@ -acknowledgmentStatement,diagnosis,species,associatedCodeURL,associatedDataset,dataSubtype,project,dataType,assay,ImmPortAccession,biospecimenType,publicationSynID,ARKRelease,datasetStatus,datasetDescription,associatedAccession,Component,datasetType,program,biospecimenSubtype,programPhase +ARKRelease,AcknowledgmentStatement,Assay,AssociatedAccession,AssociatedCodeURL,AssociatedDataset,BiospecimenSubtype,BiospecimenType,Component,DataSubtype,DataType,DatasetDescription,DatasetStatus,DatasetType,Diagnosis,ImmPortAccession,Program,ProgramPhase,Project,PublicationSynID,Species diff --git a/model_templates/ark.FCSFileAnnotationTemplate.csv b/model_templates/ark.FCSFileAnnotationTemplate.csv index 80afbcda..17d13c98 100644 --- a/model_templates/ark.FCSFileAnnotationTemplate.csv +++ b/model_templates/ark.FCSFileAnnotationTemplate.csv @@ -1 +1 @@ -fileFormat,eventCount,specimenModality,Component,assay,dataCollectionBatch,sampleProcessingBatch,individualID,biospecimenID +Assay,BiospecimenID,Component,DataCollectionBatch,EventCount,FileFormat,IndividualID,SampleProcessingBatch,SpecimenModality diff --git a/model_templates/ark.FastqFileAnnotationTemplate.csv b/model_templates/ark.FastqFileAnnotationTemplate.csv index 8e828c42..9623c883 100644 --- a/model_templates/ark.FastqFileAnnotationTemplate.csv +++ b/model_templates/ark.FastqFileAnnotationTemplate.csv @@ -1 +1 @@ -Component,fileFormat,specimenModality,readLength,assay,libraryID,biospecimenID,individualID,targetPanel,targetPanelSynID,targetPanelSize +Assay,BiospecimenID,Component,FileFormat,IndividualID,LibraryID,ReadLength,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID diff --git a/model_templates/ark.InVitroBiospecimenMetadataTemplate.csv b/model_templates/ark.InVitroBiospecimenMetadataTemplate.csv index d56c4e8f..21d94f93 100644 --- a/model_templates/ark.InVitroBiospecimenMetadataTemplate.csv +++ b/model_templates/ark.InVitroBiospecimenMetadataTemplate.csv @@ -1 +1 @@ -program,biospecimenID,notes,altSampleID,biospecimenType,individualID,parentBiospecimenID,biospecimenSubtype,Component,project,treatmentTimepoint,sampleCollectionBatch,treatment,visitID,primaryCellSource,cellType,cellOntologyID,anatomicalSite,skinSiteStatus,synovialCollectionProcedure,salivaCollectionProcedure,krennLining,krennInflammatory,krennSynovitisScore,krennStroma,FACSPopulation,userDefinedCellType +AltSampleID,AnatomicalSite,BiospecimenID,BiospecimenSubtype,BiospecimenType,CellOntologyID,CellType,Component,FACSPopulation,IndividualID,KrennInflammatory,KrennLining,KrennStroma,KrennSynovitisScore,Notes,ParentBiospecimenID,PrimaryCellSource,Program,Project,SalivaCollectionProcedure,SampleCollectionBatch,SkinSiteStatus,SynovialCollectionProcedure,Treatment,TreatmentTimepoint,UserDefinedCellType,VisitID diff --git a/model_templates/ark.OlinkAssayMetadataTemplate.csv b/model_templates/ark.OlinkAssayMetadataTemplate.csv index 2731a7f9..5872704c 100644 --- a/model_templates/ark.OlinkAssayMetadataTemplate.csv +++ b/model_templates/ark.OlinkAssayMetadataTemplate.csv @@ -1 +1 @@ -targetPanelSynID,targetPanel,Component,targetPanelSize,platform,plateID,assay +Assay,Component,PlateID,Platform,TargetPanel,TargetPanelSize,TargetPanelSynID diff --git a/model_templates/ark.OlinkFileAnnotationTemplate.csv b/model_templates/ark.OlinkFileAnnotationTemplate.csv index 447c2338..ff8a582c 100644 --- a/model_templates/ark.OlinkFileAnnotationTemplate.csv +++ b/model_templates/ark.OlinkFileAnnotationTemplate.csv @@ -1 +1 @@ -fileFormat,specimenModality,plateID,resourceType,Component +Component,FileFormat,PlateID,ResourceType,SpecimenModality diff --git a/model_templates/ark.PublicationMetadataTemplate.csv b/model_templates/ark.PublicationMetadataTemplate.csv index e13c167a..9d580840 100644 --- a/model_templates/ark.PublicationMetadataTemplate.csv +++ b/model_templates/ark.PublicationMetadataTemplate.csv @@ -1 +1 @@ -PMID,associatedDataset,Component,publicationDate,publicationType,title,DOI,PMCID,year,program,journal,project,programPhase +AssociatedDataset,Component,DOI,Journal,PMCID,PMID,Program,ProgramPhase,Project,PublicationDate,PublicationType,Title,Year diff --git a/model_templates/ark.ScRNASeqAssayMetadataTemplate.csv b/model_templates/ark.ScRNASeqAssayMetadataTemplate.csv index 5d8417ed..7c5d5feb 100644 --- a/model_templates/ark.ScRNASeqAssayMetadataTemplate.csv +++ b/model_templates/ark.ScRNASeqAssayMetadataTemplate.csv @@ -1 +1 @@ -alignmentReference,sampleProcessingBatch,dataCollectionBatch,assay,inputCellCount,libraryPrepMethod,totalReads,sequencingSaturation,softwareAndVersion,specimenModality,Component,platform,nucleicAcidSource,percentCellViability,10xProbeSetReference,biospecimenID,libraryID +10xProbeSetReference,AlignmentReference,Assay,BiospecimenID,Component,DataCollectionBatch,InputCellCount,LibraryID,LibraryPrepMethod,NucleicAcidSource,PercentCellViability,Platform,SampleProcessingBatch,SequencingSaturation,SoftwareAndVersion,SpecimenModality,TotalReads diff --git a/model_templates/ark.ScRNASeqProcessedDataAnnotationTemplate.csv b/model_templates/ark.ScRNASeqProcessedDataAnnotationTemplate.csv index e017fab1..34b3e4ba 100644 --- a/model_templates/ark.ScRNASeqProcessedDataAnnotationTemplate.csv +++ b/model_templates/ark.ScRNASeqProcessedDataAnnotationTemplate.csv @@ -1 +1 @@ -resourceType,assay,fileFormat,dataLevel,Component,cellRangerOutput,specimenModality,metadataType,processedDataType,targetPanelSize,targetPanelSynID,targetPanel,RObjectClass,individualID,biospecimenID +Assay,BiospecimenID,CellRangerOutput,Component,DataLevel,FileFormat,IndividualID,MetadataType,ProcessedDataType,RObjectClass,ResourceType,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID diff --git a/model_templates/ark.ScVDJSeqProcessedDataAnnotationTemplate.csv b/model_templates/ark.ScVDJSeqProcessedDataAnnotationTemplate.csv index 76dc5a9d..5c272687 100644 --- a/model_templates/ark.ScVDJSeqProcessedDataAnnotationTemplate.csv +++ b/model_templates/ark.ScVDJSeqProcessedDataAnnotationTemplate.csv @@ -1 +1 @@ -dataLevel,Component,specimenModality,fileFormat,assay,resourceType,biospecimenID,individualID,RObjectClass,targetPanelSize,targetPanelSynID,targetPanel,metadataType,processedDataType +Assay,BiospecimenID,Component,DataLevel,FileFormat,IndividualID,MetadataType,ProcessedDataType,RObjectClass,ResourceType,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID diff --git a/model_templates/ark.SnATAC-seqAssayMetadataTemplate.csv b/model_templates/ark.SnATAC-seqAssayMetadataTemplate.csv index ff75dde3..7c5d5feb 100644 --- a/model_templates/ark.SnATAC-seqAssayMetadataTemplate.csv +++ b/model_templates/ark.SnATAC-seqAssayMetadataTemplate.csv @@ -1 +1 @@ -assay,nucleicAcidSource,specimenModality,dataCollectionBatch,softwareAndVersion,alignmentReference,totalReads,percentCellViability,platform,Component,sequencingSaturation,sampleProcessingBatch,inputCellCount,libraryPrepMethod,libraryID,biospecimenID,10xProbeSetReference +10xProbeSetReference,AlignmentReference,Assay,BiospecimenID,Component,DataCollectionBatch,InputCellCount,LibraryID,LibraryPrepMethod,NucleicAcidSource,PercentCellViability,Platform,SampleProcessingBatch,SequencingSaturation,SoftwareAndVersion,SpecimenModality,TotalReads diff --git a/model_templates/ark.SnATACSeqProcessedDataAnnotationTemplate.csv b/model_templates/ark.SnATACSeqProcessedDataAnnotationTemplate.csv index 157f4c03..34b3e4ba 100644 --- a/model_templates/ark.SnATACSeqProcessedDataAnnotationTemplate.csv +++ b/model_templates/ark.SnATACSeqProcessedDataAnnotationTemplate.csv @@ -1 +1 @@ -assay,Component,resourceType,specimenModality,fileFormat,dataLevel,cellRangerOutput,targetPanelSize,targetPanelSynID,targetPanel,metadataType,processedDataType,individualID,biospecimenID,RObjectClass +Assay,BiospecimenID,CellRangerOutput,Component,DataLevel,FileFormat,IndividualID,MetadataType,ProcessedDataType,RObjectClass,ResourceType,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID diff --git a/model_templates/ark.SnRNASeqAssayMetadataTemplate.csv b/model_templates/ark.SnRNASeqAssayMetadataTemplate.csv index e529f24a..7c5d5feb 100644 --- a/model_templates/ark.SnRNASeqAssayMetadataTemplate.csv +++ b/model_templates/ark.SnRNASeqAssayMetadataTemplate.csv @@ -1 +1 @@ -dataCollectionBatch,percentCellViability,softwareAndVersion,alignmentReference,nucleicAcidSource,sampleProcessingBatch,Component,platform,assay,inputCellCount,sequencingSaturation,totalReads,libraryPrepMethod,specimenModality,10xProbeSetReference,libraryID,biospecimenID +10xProbeSetReference,AlignmentReference,Assay,BiospecimenID,Component,DataCollectionBatch,InputCellCount,LibraryID,LibraryPrepMethod,NucleicAcidSource,PercentCellViability,Platform,SampleProcessingBatch,SequencingSaturation,SoftwareAndVersion,SpecimenModality,TotalReads diff --git a/model_templates/ark.SnRNASeqProcessedDataAnnotationTemplate.csv b/model_templates/ark.SnRNASeqProcessedDataAnnotationTemplate.csv index fcc7caf1..34b3e4ba 100644 --- a/model_templates/ark.SnRNASeqProcessedDataAnnotationTemplate.csv +++ b/model_templates/ark.SnRNASeqProcessedDataAnnotationTemplate.csv @@ -1 +1 @@ -resourceType,fileFormat,specimenModality,assay,cellRangerOutput,Component,dataLevel,metadataType,processedDataType,RObjectClass,individualID,biospecimenID,targetPanelSize,targetPanel,targetPanelSynID +Assay,BiospecimenID,CellRangerOutput,Component,DataLevel,FileFormat,IndividualID,MetadataType,ProcessedDataType,RObjectClass,ResourceType,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID diff --git a/model_templates/ark.SpatialImagingAssayMetadataTemplate.csv b/model_templates/ark.SpatialImagingAssayMetadataTemplate.csv index 1b5cc61c..6e886fb9 100644 --- a/model_templates/ark.SpatialImagingAssayMetadataTemplate.csv +++ b/model_templates/ark.SpatialImagingAssayMetadataTemplate.csv @@ -1 +1 @@ -targetPanelSize,assay,specimenModality,Component,targetPanelSynID,sampleProcessingBatch,targetPanel,dataCollectionBatch,slideID,biospecimenID +Assay,BiospecimenID,Component,DataCollectionBatch,SampleProcessingBatch,SlideID,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID diff --git a/model_templates/ark.SpatialImagingFileAnnotationTemplate.csv b/model_templates/ark.SpatialImagingFileAnnotationTemplate.csv index d347c012..0ae7e4d6 100644 --- a/model_templates/ark.SpatialImagingFileAnnotationTemplate.csv +++ b/model_templates/ark.SpatialImagingFileAnnotationTemplate.csv @@ -1 +1 @@ -fileFormat,assay,Component,resourceType,specimenModality,RObjectClass,metadataType,processedDataType,slideID,biospecimenID +Assay,BiospecimenID,Component,FileFormat,MetadataType,ProcessedDataType,RObjectClass,ResourceType,SlideID,SpecimenModality diff --git a/model_templates/jmvera.FastqFileAnnotationTemplate.csv b/model_templates/jmvera.FastqFileAnnotationTemplate.csv deleted file mode 100644 index 9623c883..00000000 --- a/model_templates/jmvera.FastqFileAnnotationTemplate.csv +++ /dev/null @@ -1 +0,0 @@ -Assay,BiospecimenID,Component,FileFormat,IndividualID,LibraryID,ReadLength,SpecimenModality,TargetPanel,TargetPanelSize,TargetPanelSynID diff --git a/utils/context_specific_models.py b/utils/context_specific_models.py index 73824855..f8c0d204 100644 --- a/utils/context_specific_models.py +++ b/utils/context_specific_models.py @@ -5,6 +5,11 @@ import sys import time +''' +this script will take each context.csv and combine with the ark.all_attributes.csv +to generate context-specific model csv files. +''' + #### #### Functions #### @@ -61,7 +66,7 @@ def update_all_attributes(allAttr, vv): contexts = [c for c in contexts if c not in [".DS_Store", ".Rhistory"]] # for local execution # shore-up attribute valid values so that all context-specific valid values are included in all_attributes.csv -allAttr = pd.read_csv("ark.all_attributes.csv") +allAttr = pd.read_csv("ark.all_attributes.csv", dtype="object") all_vv = get_valid_values_dict(allAttr) # then compile context-specific valid values @@ -107,7 +112,7 @@ def update_all_attributes(allAttr, vv): update_all_attributes(allAttr, all_vv) # read in newest version and prep all attributes csv -allAttr = pd.read_csv("ark.all_attributes.csv") +allAttr = pd.read_csv("ark.all_attributes.csv", dtype="object") # create dictionary of attribute descriptions that can be pulled into context models descriptions = allAttr.loc[:, ["Attribute", "Description"]].set_index("Attribute").to_dict("index") diff --git a/utils/generate_csv_templates.py b/utils/generate_csv_templates.py index b26a45bd..c4a2b94d 100644 --- a/utils/generate_csv_templates.py +++ b/utils/generate_csv_templates.py @@ -3,6 +3,12 @@ import sys import json +''' +this script generates a "blank" csv file for every json schema in model_json_schema/ +which are used for various downstream purposes including the data dictionary site +and BDM curation work +''' + #### #### Functions #### diff --git a/utils/generate_jsonschema.py b/utils/generate_jsonschema.py new file mode 100644 index 00000000..2a6ea415 --- /dev/null +++ b/utils/generate_jsonschema.py @@ -0,0 +1,30 @@ +from synapseclient import Synapse +from synapseclient.extensions.curator import generate_jsonschema +import pandas as pd + +''' +use synapseclient extension to create Curator json schema from context models +''' + +# create synapse client obj, this will be unnecessary in future client releases +syn = Synapse() + +# read in compiled set of templates for each context +templates = pd.read_table("templates_by_context.txt", header=None) +templates.columns = ['template', 'context'] +templates = templates.groupby(['context']).agg({'template': lambda x: list(x)}).reset_index() +templates = templates.set_index('context').to_dict()['template'] + +for context in templates.keys(): + print(f"Generating JSON schemas for context: {context}") + for t in templates[context]: + schemas, file_paths = generate_jsonschema( + data_model_source=f"model_contexts/{context}/ark.{context}_model.csv", + output=f"model_json_schema/ark.{t}.schema.json", + data_types= [t], + synapse_client=syn + ) + +print("JSON schema generation complete!") + +# END