diff --git a/+bids/+internal/add_missing_field.m b/+bids/+internal/add_missing_field.m new file mode 100644 index 00000000..9e7ae6f1 --- /dev/null +++ b/+bids/+internal/add_missing_field.m @@ -0,0 +1,5 @@ +function structure = add_missing_field(structure, field) + if ~isfield(structure, field) + structure(1).(field) = ''; + end +end diff --git a/+bids/+internal/append_to_layout.m b/+bids/+internal/append_to_layout.m new file mode 100644 index 00000000..11c3a76e --- /dev/null +++ b/+bids/+internal/append_to_layout.m @@ -0,0 +1,91 @@ +function subject = append_to_layout(file, subject, modality, schema) + % + % appends a file to the BIDS layout by parsing it according to the provided schema + % + % USAGE:: + % + % subject = append_to_layout(file, subject, modality, schema == []) + % + % :param file: + % :type file: string + % :param subject: subject sub-structure from the BIDS layout + % :type subject: strcture + % :param modality: + % :type modality: string + % :param schema: + % :type schema: strcture + % + % + % Copyright (C) 2021--, BIDS-MATLAB developers + + if ~exist('schema', 'var') + schema = []; + end + + % Parse file fist to identify the suffix group in the template. + % Then reparse the file using the entity-label pairs defined in the schema. + p = bids.internal.parse_filename(file); + + idx = find_suffix_group(modality, p.suffix, schema); + + if ~isempty(schema) + + if isempty(idx) + warning('append_to_structure:noMatchingSuffix', ... + 'Skipping file with no valid suffix in schema: %s', file); + return + end + + entities = bids.schema.return_modality_entities(schema.datatypes.(modality)(idx), schema); + p = bids.internal.parse_filename(file, entities); + + end + + % Check any new entity field that needs to be added into the layout or the output + % of the parsing to make sure the 2 structures can be concatenated + if ~isempty(subject.(modality)) + + [subject.(modality), p] = bids.internal.match_structure_fields(subject.(modality), p); + + end + + if isempty(subject.(modality)) + subject.(modality) = p; + else + subject.(modality)(end + 1, 1) = p; + end + +end + +function idx = find_suffix_group(modality, suffix, schema) + + idx = []; + + if isempty(schema) + return + end + + % the following loop could probably be improved with some cellfun magic + % cellfun(@(x, y) any(strcmp(x,y)), {p.type}, suffix_groups) + for i = 1:size(schema.datatypes.(modality), 1) + + this_suffix_group = schema.datatypes.(modality)(i); + + % for CI + if iscell(this_suffix_group) + this_suffix_group = this_suffix_group{1}; + end + + if any(strcmp(suffix, this_suffix_group.suffixes)) + idx = i; + break + end + + end + + if isempty(idx) + warning('findSuffix:noMatchingSuffix', ... + 'No corresponding suffix in schema for %s for datatype %s', suffix, modality); + end + +end diff --git a/+bids/+internal/file_utils.m b/+bids/+internal/file_utils.m index 70b8083b..c565803f 100644 --- a/+bids/+internal/file_utils.m +++ b/+bids/+internal/file_utils.m @@ -282,7 +282,6 @@ files = dirs; else - t = regexp(files, expr); if numel(files) == 1 && ~iscell(t) diff --git a/+bids/+internal/get_metadata.m b/+bids/+internal/get_metadata.m index 612658f1..106ddf39 100644 --- a/+bids/+internal/get_metadata.m +++ b/+bids/+internal/get_metadata.m @@ -21,7 +21,7 @@ N = 3; % -There is a session level in the hierarchy - if isfield(p, 'ses') && ~isempty(p.ses) + if isfield(p.entities, 'ses') && ~isempty(p.entities.ses) N = N + 1; end @@ -31,7 +31,7 @@ % -List the potential metadata files associated with this file suffix type % Default is to assume it is a JSON file - metafile = bids.internal.file_utils('FPList', pth, sprintf(pattern, p.type)); + metafile = bids.internal.file_utils('FPList', pth, sprintf(pattern, p.suffix)); if isempty(metafile) metafile = {}; @@ -44,13 +44,17 @@ for i = 1:numel(metafile) p2 = bids.internal.parse_filename(metafile{i}); - fn = setdiff(fieldnames(p2), {'filename', 'ext', 'type'}); + entities = {}; + if isfield(p2, 'entities') + entities = fieldnames(p2.entities); + end % -Check if this metadata file contains the same entity-label pairs as its % data file counterpart ismeta = true; - for j = 1:numel(fn) - if ~isfield(p, fn{j}) || ~strcmp(p.(fn{j}), p2.(fn{j})) + for j = 1:numel(entities) + if ~isfield(p.entities, entities{j}) || ... + ~strcmp(p.entities.(entities{j}), p2.entities.(entities{j})) ismeta = false; break end @@ -73,9 +77,15 @@ end - % ========================================================================== - % -Inheritance principle - % ========================================================================== + if isempty(meta) + warning('No metadata for %s', filename); + end + +end + +% ========================================================================== +% -Inheritance principle +% ========================================================================== function s1 = update_metadata(s1, s2, file) if isempty(s2) return @@ -88,3 +98,4 @@ s1.(fn{i}) = s2.(fn{i}); end end +end diff --git a/+bids/+internal/keep_file_for_query.m b/+bids/+internal/keep_file_for_query.m new file mode 100644 index 00000000..391116ef --- /dev/null +++ b/+bids/+internal/keep_file_for_query.m @@ -0,0 +1,32 @@ +function status = keep_file(file_struct, options) + + status = true; + + % suffix is treated separately as it is not one of the entities + for l = 1:size(options, 1) + if strcmp(options{l, 1}, 'suffix') && ~ismember(file_struct.suffix, options{l, 2}) + status = false; + return + end + end + + for l = 1:size(options, 1) + + if ~strcmp(options{l, 1}, 'suffix') + + if ~ismember(options{l, 1}, fieldnames(file_struct.entities)) + status = false; + break + end + + if isfield(file_struct.entities, options{l, 1}) && ... + ~ismember(file_struct.entities.(options{l, 1}), options{l, 2}) + status = false; + break + end + + end + + end + +end diff --git a/+bids/+internal/match_structure_fields.m b/+bids/+internal/match_structure_fields.m new file mode 100644 index 00000000..1234f84a --- /dev/null +++ b/+bids/+internal/match_structure_fields.m @@ -0,0 +1,14 @@ +function [s1, s2] = match_structure_fields(s1, s2) + + missing_fields = setxor(fieldnames(s1), fieldnames(s2)); + + if ~isempty(missing_fields) + for iField = 1:numel(missing_fields) + + s1 = bids.internal.add_missing_field(s1, missing_fields{iField}); + s2 = bids.internal.add_missing_field(s2, missing_fields{iField}); + + end + end + +end diff --git a/+bids/+internal/parse_filename.m b/+bids/+internal/parse_filename.m index 681a2c8e..ebaba2bf 100644 --- a/+bids/+internal/parse_filename.m +++ b/+bids/+internal/parse_filename.m @@ -1,23 +1,35 @@ function p = parse_filename(filename, fields) + % % Split a filename into its building constituents - % FORMAT p = bids.internal.parse_filename(filename, fields) + % + % USAGE:: + % + % p = bids.internal.parse_filename(filename, fields) + % + % :param filename: fielname to parse that follows the pattern + % ``sub-label[_entity-label]*_suffix.extension`` + % :type filename: string + % :param fields: cell of strings of the entities to use for parsing + % :type fields: cell % % Example: % - % >> filename = '../sub-16/anat/sub-16_ses-mri_run-1_echo-2_FLASH.nii.gz'; - % >> bids.internal.parse_filename(filename) + % filename = '../sub-16/anat/sub-16_ses-mri_run-1_acq-hd_T1w.nii.gz'; % - % ans = + % bids.internal.parse_filename(filename) + % + % ans = % % struct with fields: % - % filename: 'sub-16_ses-mri_run-1_echo-2_FLASH.nii.gz' - % type: 'FLASH' - % ext: '.nii.gz' - % sub: '16' - % ses: 'mri' - % run: '1' - % echo: '2' + % 'filename', 'sub-16_ses-mri_run-1_acq-hd_T1w.nii.gz', ... + % 'suffix', 'T1w', ... + % 'ext', '.nii.gz', ... + % 'entities', struct('sub', '16', ... + % 'ses', 'mri', ... + % 'run', '1', ... + % 'acq', 'hd'); + % % __________________________________________________________________________ % Copyright (C) 2016-2018, Guillaume Flandin, Wellcome Centre for Human Neuroimaging @@ -26,31 +38,31 @@ filename = bids.internal.file_utils(filename, 'filename'); % -Identify all the BIDS entity-label pairs present in the filename (delimited by "_") - % https://bids-specification.readthedocs.io/en/stable/99-appendices/04-entity-table.html [parts, dummy] = regexp(filename, '(?:_)+', 'split', 'match'); %#ok p.filename = filename; % -Identify the suffix and extension of this file - % https://bids-specification.readthedocs.io/en/stable/02-common-principles.html#file-name-structure - [p.type, p.ext] = strtok(parts{end}, '.'); + [p.suffix, p.ext] = strtok(parts{end}, '.'); % -Separate the entity from the label for each pair identified above for i = 1:numel(parts) - 1 [d, dummy] = regexp(parts{i}, '(?:\-)+', 'split', 'match'); %#ok - p.(d{1}) = d{2}; + p.entities.(d{1}) = d{2}; end % -Extra fields can be added to the structure and ordered specifically. if nargin == 2 for i = 1:numel(fields) - if ~isfield(p, fields{i}) - p.(fields{i}) = ''; - end + p.entities = bids.internal.add_missing_field(p.entities, fields{i}); end try - p = orderfields(p, ['filename', 'ext', 'type', fields]); + p = orderfields(p, {'filename', 'ext', 'suffix', 'entities'}); + p.entities = orderfields(p.entities, fields); catch - warning('Ignoring file ''%s'' not matching template.', filename); + warning('bidsMatlab:noMatchingTemplate', ... + 'Ignoring file %s not matching template.', filename); p = struct([]); end end + +end diff --git a/+bids/+internal/return_modality_extensions.m b/+bids/+internal/return_modality_extensions.m new file mode 100644 index 00000000..8a1d4eea --- /dev/null +++ b/+bids/+internal/return_modality_extensions.m @@ -0,0 +1,19 @@ +function extensions = return_modality_extensions(modality) + + extensions = '('; + + % for CI + if iscell(modality) + modality = modality{1}; + end + + for iExt = 1:numel(modality.extensions) + if ~strcmp(modality.extensions{iExt}, '.json') + extensions = [extensions, modality.extensions{iExt}, '|']; %#ok + end + end + + % Replace final "|" by a "){1}" + extensions(end:end + 3) = '){1}'; + +end diff --git a/+bids/+internal/return_modality_regular_expression.m b/+bids/+internal/return_modality_regular_expression.m new file mode 100644 index 00000000..938e8f72 --- /dev/null +++ b/+bids/+internal/return_modality_regular_expression.m @@ -0,0 +1,8 @@ +function regular_expression = return_modality_regular_expression(modality) + + suffixes = bids.internal.return_modality_suffixes(modality); + extensions = bids.internal.return_modality_extensions(modality); + + regular_expression = ['^%s.*' suffixes extensions '$']; + +end diff --git a/+bids/+internal/return_modality_suffixes.m b/+bids/+internal/return_modality_suffixes.m new file mode 100644 index 00000000..10d81750 --- /dev/null +++ b/+bids/+internal/return_modality_suffixes.m @@ -0,0 +1,17 @@ +function suffixes = return_modality_suffixes(modality) + + suffixes = '_('; + + % For CI + if iscell(modality) + modality = modality{1}; + end + + for iExt = 1:numel(modality(:).suffixes) + suffixes = [suffixes, modality.suffixes{iExt}, '|']; %#ok + end + + % Replace final "|" by a "){1}" + suffixes(end:end + 3) = '){1}'; + +end diff --git a/+bids/+schema/load_schema.m b/+bids/+schema/load_schema.m new file mode 100644 index 00000000..3f656e69 --- /dev/null +++ b/+bids/+schema/load_schema.m @@ -0,0 +1,79 @@ +function schema = load_schema(use_schema) + % Loads a json schema by recursively looking through a folder structure. + % + % The nesting of the output structure reflects a combination of the folder structure and + % any eventual nesting within each json. + % + % + % Copyright (C) 2021--, BIDS-MATLAB developers + + % TODO: + % - folders that do not contain json files themselves but contain + % subfolders that do, are not reflected in the output structure (they are + % skipped). This can lead to "name conflicts". See "silenced" unit tests + % for more info. + + if nargin < 1 + use_schema = true(); + end + + if ~use_schema + schema = []; + return + end + + if ischar(use_schema) + schema_dir = use_schema; + else + schema_dir = fullfile(fileparts(mfilename('fullpath')), '..', '..', 'schema'); + end + + if ~exist(schema_dir, 'dir') + error('The schema directory %s does not exist.', schema_dir); + end + + schema = struct(); + + [json_file_list, dirs] = bids.internal.file_utils('FPList', schema_dir, '^.*.json$'); + + schema = append_json_content_to_structure(schema, json_file_list); + + schema = inspect_subdir(schema, dirs); + +end + +function structure = append_json_content_to_structure(structure, json_file_list) + + for iFile = 1:size(json_file_list, 1) + + file = deblank(json_file_list(iFile, :)); + + field_name = bids.internal.file_utils(file, 'basename'); + + structure.(field_name) = bids.util.jsondecode(file); + end + +end + +function structure = inspect_subdir(structure, subdir_list) + % recursively inspects subdirectory for json files and reflects folder + % hierarchy in the output structure. + + for iDir = 1:size(subdir_list, 1) + + directory = deblank(subdir_list(iDir, :)); + + [json_file_list, dirs] = bids.internal.file_utils('FPList', directory, '^.*.json$'); + + if ~isempty(json_file_list) + field_name = bids.internal.file_utils(directory, 'basename'); + structure.(field_name) = struct(); + structure.(field_name) = append_json_content_to_structure(structure.(field_name), ... + json_file_list); + end + + structure = inspect_subdir(structure, dirs); + + end + +end diff --git a/+bids/+schema/return_modalities.m b/+bids/+schema/return_modalities.m new file mode 100644 index 00000000..07643c2b --- /dev/null +++ b/+bids/+schema/return_modalities.m @@ -0,0 +1,13 @@ +function modalities = return_modalities(subject, schema, modality_group) + + % if we go schema-less we list directories in the subject/session folder + % as proxy of the modalities that we have to parse + modalities = cellstr(bids.internal.file_utils('List', ... + subject.path, ... + 'dir', ... + '.*')); + if ~isempty(schema) + modalities = schema.modalities.(modality_group).datatypes; + end + +end diff --git a/+bids/+schema/return_modality_entities.m b/+bids/+schema/return_modality_entities.m new file mode 100644 index 00000000..d90a9720 --- /dev/null +++ b/+bids/+schema/return_modality_entities.m @@ -0,0 +1,14 @@ +function entities = return_modality_entities(suffix_group, schema) + + % for CI + if iscell(suffix_group) + suffix_group = suffix_group{1}; + end + + entity_names = fieldnames(suffix_group.entities); + + for i = 1:size(entity_names, 1) + entities{1, i} = schema.entities.(entity_names{i}).entity; %#ok<*AGROW> + end + +end diff --git a/+bids/+schema/return_modality_groups.m b/+bids/+schema/return_modality_groups.m new file mode 100644 index 00000000..3651f232 --- /dev/null +++ b/+bids/+schema/return_modality_groups.m @@ -0,0 +1,9 @@ +function modality_groups = return_modality_groups(schema) + + % dummy variable if we go schema less + modality_groups = {nan()}; + if ~isempty(schema) + modality_groups = fieldnames(schema.modalities); + end + +end diff --git a/+bids/layout.m b/+bids/layout.m index 872fde01..4d3f0fcf 100644 --- a/+bids/layout.m +++ b/+bids/layout.m @@ -1,9 +1,23 @@ -function BIDS = layout(root, tolerant) +function BIDS = layout(root, use_schema) + % % Parse a directory structure formated according to the BIDS standard - % FORMAT BIDS = bids.layout(root) - % root - directory formated according to BIDS [Default: pwd] - % tolerant - if set to 0 (default) only files g - % BIDS - structure containing the BIDS file layout + % + % USAGE:: + % + % BIDS = bids.layout(root = pwd, use_schema = false) + % + % :param root: directory of the dataset formated according to BIDS [default: ``pwd``] + % :type root: string + % :param use_schema: If set to ``true`` (default), the parsing of the dataset + % will follow the bids-schema provided with bids-matlab. + % If set to ``false`` files just have to be of the form + % ``sub-label_[entity-label]_suffix.ext`` to be parsed. + % If a folder path is provided, then the schema contained + % in that folder willl be used for parsing. + % :type use_schema: boolean + % + % + % __________________________________________________________________________ % % BIDS (Brain Imaging Data Structure): https://bids.neuroimaging.io/ @@ -11,7 +25,7 @@ % describing outputs of neuroimaging experiments. % K. J. Gorgolewski et al, Scientific Data, 2016. % __________________________________________________________________________ - + % % Copyright (C) 2016-2018, Guillaume Flandin, Wellcome Centre for Human Neuroimaging % Copyright (C) 2018--, BIDS-MATLAB developers @@ -19,31 +33,40 @@ % ========================================================================== if ~nargin root = pwd; + elseif nargin == 1 + if ischar(root) root = bids.internal.file_utils(root, 'CPath'); + elseif isstruct(root) - BIDS = root; % or BIDS = bids.layout(root.root); + BIDS = root; % for bids.query return + else error('Invalid syntax.'); + end + elseif nargin > 2 error('Too many input arguments.'); + + end + + if ~exist('use_schema', 'var') + use_schema = true; end - if ~exist('tolerant', 'var') - tolerant = false; + if ~exist(root, 'dir') + error('BIDS directory does not exist: ''%s''', root); end - % -BIDS structure + % BIDS structure % ========================================================================== % BIDS.dir -- BIDS directory % BIDS.description -- content of dataset_description.json % BIDS.sessions -- cellstr of sessions - % BIDS.scans -- for sub-_scans.tsv (should go within subjects) - % BIDS.sess -- for sub-_sessions.tsv (should go within subjects) % BIDS.participants -- for participants.tsv % BIDS.subjects' -- structure array of subjects @@ -51,716 +74,232 @@ 'dir', root, ... 'description', struct([]), ... 'sessions', {{}}, ... - 'scans', struct([]), ... - 'sess', struct([]), ... 'participants', struct([]), ... 'subjects', struct([])); - % -Validation of BIDS root directory - % ========================================================================== - if ~exist(BIDS.dir, 'dir') - error('BIDS directory does not exist: ''%s''', BIDS.dir); - - elseif ~exist(fullfile(BIDS.dir, 'dataset_description.json'), 'file') - - msg = sprintf('BIDS directory not valid: missing dataset_description.json: ''%s''', ... - BIDS.dir); - - tolerant_message(tolerant, msg); - - end - - % -Dataset description - % ========================================================================== - try - BIDS.description = bids.util.jsondecode(fullfile(BIDS.dir, 'dataset_description.json')); - catch err - msg = sprintf('BIDS dataset description could not be read: %s', err.message); - tolerant_message(tolerant, msg); - end - - fields_to_check = {'BIDSVersion', 'Name'}; - for iField = 1:numel(fields_to_check) - - if ~isfield(BIDS.description, fields_to_check{iField}) - msg = sprintf( ... - 'BIDS dataset description not valid: missing %s field.', ... - fields_to_check{iField}); - tolerant_message(tolerant, msg); - end - - end + BIDS = validate_description(BIDS, use_schema); - % -Optional directories + % Optional directories % ========================================================================== - % [code/] + % [code/] - ignore % [derivatives/] - % [stimuli/] - % [sourcedata/] + % [stimuli/] - ingore + % [sourcedata/] - ignore % [phenotype/] - % -Scans key file - % ========================================================================== - - % sub-/[ses-/] - % sub-_scans.tsv + BIDS.participants = []; + BIDS.participants = manage_tsv(BIDS.participants, BIDS.dir, 'participants.tsv'); - % See also optional README and CHANGES files - - % -Participant key file + % Subjects % ========================================================================== - p = bids.internal.file_utils('FPList', BIDS.dir, '^participants\.tsv$'); - if ~isempty(p) - try - BIDS.participants = bids.util.tsvread(p); - catch - msg = ['unable to read ' p]; - tolerant_message(tolerant, msg); - end - end - p = bids.internal.file_utils('FPList', BIDS.dir, '^participants\.json$'); - if ~isempty(p) - BIDS.participants.meta = bids.util.jsondecode(p); + subjects = cellstr(bids.internal.file_utils('List', BIDS.dir, 'dir', '^sub-.*$')); + if isequal(subjects, {''}) + error('No subjects found in BIDS directory.'); end - % -Sessions file - % ========================================================================== - - % sub-/[ses-/] - % sub-[_ses-]_sessions.tsv + schema = bids.schema.load_schema(use_schema); - % -Tasks: JSON files are accessed through metadata - % ========================================================================== - % t = bids.internal.file_utils('FPList',BIDS.dir,... - % '^task-.*_(beh|bold|events|channels|physio|stim|meg)\.(json|tsv)$'); - - % -Subjects - % ========================================================================== - sub = cellstr(bids.internal.file_utils('List', BIDS.dir, 'dir', '^sub-.*$')); - if isequal(sub, {''}) - error('No subjects found in BIDS directory.'); - end + for iSub = 1:numel(subjects) + sessions = cellstr(bids.internal.file_utils('List', ... + fullfile(BIDS.dir, subjects{iSub}), ... + 'dir', ... + '^ses-.*$')); - for iSub = 1:numel(sub) - sess = cellstr(bids.internal.file_utils('List', ... - fullfile(BIDS.dir, sub{iSub}), ... - 'dir', ... - '^ses-.*$')); - - for iSess = 1:numel(sess) + for iSess = 1:numel(sessions) if isempty(BIDS.subjects) - BIDS.subjects = parse_subject(BIDS.dir, sub{iSub}, sess{iSess}); + BIDS.subjects = parse_subject(BIDS.dir, subjects{iSub}, sessions{iSess}, schema); else - BIDS.subjects(end + 1) = parse_subject(BIDS.dir, sub{iSub}, sess{iSess}); + new_subject = parse_subject(BIDS.dir, subjects{iSub}, sessions{iSess}, schema); + + [BIDS.subjects, new_subject] = bids.internal.match_structure_fields(BIDS.subjects, ... + new_subject); + % TODO: this can be added to "match_structure_fields" + BIDS.subjects(end + 1) = new_subject; + end end - - end -end - -function tolerant_message(tolerant, msg) - if tolerant - warning(msg); - else - error(msg); end -end -% ========================================================================== -% -Parse a subject's directory -% ========================================================================== -function subject = parse_subject(pth, subjname, sesname) +end +function subject = parse_subject(pth, subjname, sesname, schema) + % + % Parse a subject's directory + % % For each modality (anat, func, eeg...) all the files from the - % corresponding directory are listed and their filenames parsed with extra - % BIDS valid entities listed (e.g. 'acq','ce','rec','fa'...). + % corresponding directory are listed and their filenames parsed with + % BIDS valid entities as listed in the schema (if the schema is not empty). subject.name = subjname; % subject name ('sub-') subject.path = fullfile(pth, subjname, sesname); % full path to subject directory - subject.session = sesname; % session name ('' or 'ses-