From c74d14001c4d520ae252825b10698cfced668f0d Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Mon, 2 Dec 2024 19:03:08 -0500 Subject: [PATCH 01/12] incremental bids importer bids session dataclass bids participants dataclass factorize combination iteration fix layout ignore skip files already inserted wip fix mri path join rebase commit migrate to new database abstraction rewrite optional dataset_description.json return an error on unknown scan types fix wrong counter and memory use --- pyproject.toml | 3 +- python/lib/bidsreader.py | 283 -------- python/lib/candidate.py | 142 ---- python/lib/config.py | 9 + .../nifti_insertion_pipeline.py | 7 +- python/lib/eeg.py | 233 ++----- python/lib/imaging_lib/bids/dataset.py | 360 +++++++++++ .../imaging_lib/bids/dataset_description.py | 54 ++ .../lib/{bids.py => imaging_lib/bids/json.py} | 18 + .../lib/imaging_lib/bids/tsv_participants.py | 123 ++++ python/lib/imaging_lib/bids/tsv_scans.py | 126 ++++ python/lib/imaging_lib/bids/util.py | 20 + python/lib/imaging_lib/file.py | 45 ++ python/lib/imaging_lib/file_parameter.py | 81 +++ python/lib/imaging_lib/mri_scan_type.py | 17 + python/lib/imaging_lib/nifti.py | 7 +- python/lib/imaging_lib/nifti_pic.py | 67 ++ python/lib/import_bids_dataset/args.py | 13 + .../check_subjects_sessions.py | 413 ++++++++++++ python/lib/import_bids_dataset/env.py | 29 + python/lib/import_bids_dataset/events.py | 69 ++ python/lib/import_bids_dataset/imaging.py | 48 ++ python/lib/import_bids_dataset/main.py | 315 +++++++++ python/lib/import_bids_dataset/mri.py | 224 +++++++ python/lib/import_bids_dataset/print.py | 30 + python/lib/mri.py | 455 ------------- python/lib/scanstsv.py | 128 ---- python/lib/session.py | 228 ------- python/scripts/bids_import.py | 610 ------------------ python/scripts/import_bids_dataset.py | 110 ++++ 30 files changed, 2228 insertions(+), 2039 deletions(-) delete mode 100644 python/lib/bidsreader.py create mode 100644 python/lib/imaging_lib/bids/dataset.py create mode 100644 python/lib/imaging_lib/bids/dataset_description.py rename python/lib/{bids.py => imaging_lib/bids/json.py} (64%) create mode 100644 python/lib/imaging_lib/bids/tsv_participants.py create mode 100644 python/lib/imaging_lib/bids/tsv_scans.py create mode 100644 python/lib/imaging_lib/bids/util.py create mode 100644 python/lib/imaging_lib/file.py create mode 100644 python/lib/imaging_lib/file_parameter.py create mode 100644 python/lib/imaging_lib/mri_scan_type.py create mode 100644 python/lib/imaging_lib/nifti_pic.py create mode 100644 python/lib/import_bids_dataset/args.py create mode 100644 python/lib/import_bids_dataset/check_subjects_sessions.py create mode 100644 python/lib/import_bids_dataset/env.py create mode 100644 python/lib/import_bids_dataset/events.py create mode 100644 python/lib/import_bids_dataset/imaging.py create mode 100644 python/lib/import_bids_dataset/main.py create mode 100644 python/lib/import_bids_dataset/mri.py create mode 100644 python/lib/import_bids_dataset/print.py delete mode 100644 python/lib/mri.py delete mode 100644 python/lib/scanstsv.py delete mode 100644 python/lib/session.py delete mode 100755 python/scripts/bids_import.py create mode 100755 python/scripts/import_bids_dataset.py diff --git a/pyproject.toml b/pyproject.toml index 068ac2b6b..385343434 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,15 +67,16 @@ include = [ "python/tests", "python/lib/db", "python/lib/imaging_lib", + "python/lib/import_bids_dataset", "python/lib/import_dicom_study", "python/lib/util", - "python/lib/bids.py", "python/lib/config.py", "python/lib/config_file.py", "python/lib/env.py", "python/lib/get_session_info.py", "python/lib/logging.py", "python/lib/make_env.py", + "python/scripts/import_bids_dataset.py", "python/scripts/import_dicom_study.py", "python/scripts/summarize_dicom_study.py", ] diff --git a/python/lib/bidsreader.py b/python/lib/bidsreader.py deleted file mode 100644 index bd7da2508..000000000 --- a/python/lib/bidsreader.py +++ /dev/null @@ -1,283 +0,0 @@ -"""Reads a BIDS structure into a data dictionary using bids.grabbids.""" - -import json -import re -import sys - -from bids import BIDSLayout - -import lib.exitcode -import lib.utilities as utilities - -# import bids -# BIDSLayoutIndexer is required for PyBIDS >= 0.12.1 -# bids_pack_version = list(map(int, bids.__version__.split('.'))) -# if (bids_pack_version[0] > 0 -# or bids_pack_version[1] > 12 -# or (bids_pack_version[1] == 12 and bids_pack_version[2] > 0)): - -# from bids import BIDSLayoutIndexer - - -class BidsReader: - """ - This class reads a BIDS structure into a data dictionary using BIDS grabbids. - This dictionary will then be used to determine what to register into the - database. - - :Example: - - from lib.bidsreader import BidsReader - - # load the BIDS directory - bids_reader = BidsReader(bids_dir) - """ - - def __init__(self, bids_dir, verbose, validate = True): - """ - Constructor method for the BidsReader class. - - :param bids_dir: path to the BIDS structure to read - :type bids_dir: str - :param verbose : boolean to print verbose information - :type verbose : bool - :param validate : boolean to validate the BIDS dataset - :type validate : bool - """ - - self.verbose = verbose - self.bids_dir = bids_dir - self.bids_layout = self.load_bids_data(validate) - - # load dataset name and BIDS version - self.dataset_name = None - self.bids_version = None - try: - dataset_json = bids_dir + "/dataset_description.json" - dataset_description = {} - with open(dataset_json) as json_file: - dataset_description = json.load(json_file) - self.dataset_name = dataset_description['Name'] - self.bids_version = dataset_description['BIDSVersion'] - except Exception: - print("WARNING: Cannot read dataset_description.json") - - # load BIDS candidates information - self.participants_info = self.load_candidates_from_bids() - - # load BIDS sessions information - self.cand_sessions_list = self.load_sessions_from_bids() - - # load BIDS modality information - self.cand_session_modalities_list = self.load_modalities_from_bids() - - def load_bids_data(self, validate): - """ - Loads the BIDS study using the BIDSLayout function (part of the pybids - package) and return the object. - - :return: bids structure - """ - - if self.verbose: - print('Loading the BIDS dataset with BIDS layout library...\n') - - exclude_arr = ['code/', 'sourcedata/', 'log/', '.git'] - force_arr = [re.compile(r"_annotations\.(tsv|json)$")] - - # BIDSLayoutIndexer is required for PyBIDS >= 0.12.1 - # bids_pack_version = list(map(int, bids.__version__.split('.'))) - # disabled until is a workaround for https://github.com/bids-standard/pybids/issues/760 is found - # [file] bids_import.py - # [function] read_and_insert_bids - # [line] for modality in row['modalities']: (row['modalities'] is empty) - # if (bids_pack_version[0] > 0 - # or bids_pack_version[1] > 12 - # or (bids_pack_version[1] == 12 and bids_pack_version[2] > 0)): - # bids_layout = BIDSLayout( - # root=self.bids_dir, - # indexer=BIDSLayoutIndexer(ignore=exclude_arr, force_index=force_arr) - # ) - # else: - bids_layout = BIDSLayout( - root=self.bids_dir, - ignore=exclude_arr, - force_index=force_arr, - derivatives=True, - validate=validate - ) - - if self.verbose: - print('\t=> BIDS dataset loaded with BIDS layout\n') - - return bids_layout - - def load_candidates_from_bids(self): - """ - Loads the list of candidates from the BIDS study. List of - participants and their information will be stored in participants_info. - - :return: list of dictionaries with participant information from BIDS - :rtype: list - """ - - if self.verbose: - print('Grepping candidates from the BIDS layout...') - - # grep the participant.tsv file and parse it - participants_info = None - for file in self.bids_layout.get(suffix='participants', return_type='filename'): - # note file[0] returns the path to participants.tsv - if 'participants.tsv' in file: - participants_info = utilities.read_tsv_file(file) - else: - continue - - if participants_info: - self.candidates_list_validation(participants_info) - else: - bids_subjects = self.bids_layout.get_subjects() - participants_info = [{'participant_id': sub_id} for sub_id in bids_subjects] - - if self.verbose: - print('\t=> List of participants found:') - for participant in participants_info: - print('\t\t' + participant['participant_id']) - print('\n') - - return participants_info - - def candidates_list_validation(self, participants_info): - """ - Validates whether the subjects listed in participants.tsv match the - list of participant directory. If there is a mismatch, will exit with - error code from lib.exitcode. - """ - - if self.verbose: - print('Validating the list of participants...') - - subjects = self.bids_layout.get_subjects() - - mismatch_message = ("\nERROR: Participant ID mismatch between " - "participants.tsv and raw data found in the BIDS " - "directory") - - # check that all subjects listed in participants_info are also in - # subjects array and vice versa - for row in participants_info: - # remove the "sub-" in front of the subject ID if present - row['participant_id'] = row['participant_id'].replace('sub-', '') - if row['participant_id'] not in subjects: - print(mismatch_message) - print(row['participant_id'] + 'is missing from the BIDS Layout') - print('List of subjects parsed by the BIDS layout: ' + ', '.join(subjects)) - sys.exit(lib.exitcode.BIDS_CANDIDATE_MISMATCH) - # remove the subject from the list of subjects - subjects.remove(row['participant_id']) - - # check that no subjects are left in subjects array - if subjects: - print(mismatch_message) - sys.exit(lib.exitcode.BIDS_CANDIDATE_MISMATCH) - - if self.verbose: - print('\t=> Passed validation of the list of participants\n') - - def load_sessions_from_bids(self): - """ - Grep the list of sessions for each candidate directly from the BIDS - structure. - - :return: dictionary with the list of sessions and candidates found in the - BIDS structure - :rtype: dict - """ - - if self.verbose: - print('Grepping list of sessions from the BIDS layout...') - - cand_sessions = {} - - for row in self.participants_info: - ses = self.bids_layout.get_sessions(subject=row['participant_id']) - cand_sessions[row['participant_id']] = ses - - if self.verbose: - print('\t=> List of sessions found:\n') - for candidate in cand_sessions: - if cand_sessions[candidate]: - print('\t\t' + candidate + ': ' + ', '.join(cand_sessions[candidate])) - else: - print('\t\tNo session found for candidate ' + candidate) - print('\n') - - return cand_sessions - - def load_modalities_from_bids(self): - """ - Grep the list of modalities available for each session and candidate directly - from the BIDS structure. - - :return: dictionary for candidate and session with list of modalities - :rtype: dict - """ - - if self.verbose: - print('Grepping the different modalities from the BIDS layout...') - - cand_session_modalities_list = [] - - for subject, visit_list in self.cand_sessions_list.items(): - if visit_list: - for visit in visit_list: - modalities = self.bids_layout.get_datatype(subject=subject, session=visit) - cand_session_modalities_list.append({ - 'bids_sub_id': subject, - 'bids_ses_id': visit, - 'modalities' : modalities - }) - else: - modalities = self.bids_layout.get_datatype(subject=subject) - cand_session_modalities_list.append({ - 'bids_sub_id': subject, - 'bids_ses_id': None, - 'modalities' : modalities - }) - - if self.verbose: - print('\t=> Done grepping the different modalities from the BIDS layout\n') - - return cand_session_modalities_list - - @staticmethod - def grep_file(files_list, match_pattern, derivative_pattern=None): - """ - Grep a unique file based on a match pattern and returns it. - - :param files_list : list of files to look into - :type files_list : list - :param match_pattern : pattern to use to find the file - :type match_pattern : str - :param derivative_pattern: derivative pattern to use if the file we look for - is a derivative file - :type derivative_pattern: str - - :return: name of the first file that matches the pattern - :rtype: str - """ - - for filename in files_list: - if not derivative_pattern: - if 'derivatives' in filename: - # skip all files with 'derivatives' string in their path - continue - elif re.search(match_pattern, filename): - # grep the file that matches the match_pattern (extension) - return filename - else: - matches_derivative = re.search(derivative_pattern, filename) - if re.search(match_pattern, filename) and matches_derivative: - return filename - - return None diff --git a/python/lib/candidate.py b/python/lib/candidate.py index bbb6981c6..8f617f466 100644 --- a/python/lib/candidate.py +++ b/python/lib/candidate.py @@ -1,11 +1,6 @@ """This class gather functions for candidate handling.""" import random -import sys - -from dateutil.parser import parse - -import lib.exitcode class Candidate: @@ -57,127 +52,6 @@ def __init__(self, verbose, psc_id=None, cand_id=None, sex=None, dob=None): self.center_id = None self.project_id = None - def create_candidate(self, db, participants_info): - """ - Creates a candidate using BIDS information provided in the - participants_info's list. - - :param db : database handler object - :type db : object - :param participants_info: list of dictionary with participants - information from BIDS - :type participants_info: list - - :return: dictionary with candidate info from the candidate's table - :rtype: dict - """ - - if not self.psc_id: - print("Cannot create a candidate without a PSCID.\n") - sys.exit(lib.exitcode.CANDIDATE_CREATION_FAILURE) - - if not self.cand_id: - self.cand_id = self.generate_cand_id(db) - - for row in participants_info: - if not row['participant_id'] == self.psc_id: - continue - self.grep_bids_dob(row) - if 'sex' in row: - self.map_sex(row['sex']) - if 'age' in row: - self.age = row['age'] - - # three steps to find site: - # 1. try matching full name from 'site' column in participants.tsv in db - # 2. try extracting alias from pscid - # 3. try finding previous site in candidate table - - if 'site' in row and row['site'].lower() not in ("null", ""): - # search site id in psc table by its full name - site_info = db.pselect( - "SELECT CenterID FROM psc WHERE Name = %s", - [row['site'], ] - ) - if len(site_info) > 0: - self.center_id = site_info[0]['CenterID'] - - if self.center_id is None: - # search site id in psc table by its alias extracted from pscid - db_sites = db.pselect("SELECT CenterID, Alias FROM psc") - for site in db_sites: - if site['Alias'] in row['participant_id']: - self.center_id = site['CenterID'] - - if self.center_id is None: - # try to find participant site in db - candidate_site_project = db.pselect( - "SELECT RegistrationCenterID FROM candidate WHERE pscid = %s", - [self.psc_id, ] - ) - if len(candidate_site_project) > 0: - self.center_id = candidate_site_project[0]['RegistrationCenterID'] - - # two steps to find project: - # 1. find full name in 'project' column in participants.tsv - # 2. find previous in candidate table - - if 'project' in row and row['project'].lower() not in ("null", ""): - # search project id in Project table by its full name - project_info = db.pselect( - "SELECT ProjectID FROM Project WHERE Name = %s", - [row['project'], ] - ) - if len(project_info) > 0: - self.project_id = project_info[0]['ProjectID'] - - if self.project_id is None: - # try to find participant project - candidate_site_project = db.pselect( - "SELECT RegistrationProjectID FROM candidate WHERE pscid = %s", - [self.psc_id, ] - ) - if len(candidate_site_project) > 0: - self.center_id = candidate_site_project[0]['RegistrationProjectID'] - - if not self.center_id: - print("ERROR: could not determine site for " + self.psc_id + "." - + " Please check that your psc table contains a site with an" - + " alias matching the BIDS participant_id or a name matching the site mentioned in" - + " participants.tsv's site column") - sys.exit(lib.exitcode.PROJECT_CUSTOMIZATION_FAILURE) - - if not self.project_id: - print("ERROR: could not determine project for " + self.psc_id + "." - + " Please check that your project table contains a project with a" - + " name matching the participants.tsv's project column") - sys.exit(lib.exitcode.PROJECT_CUSTOMIZATION_FAILURE) - - if self.verbose: - print("Creating candidate with \n" - + "PSCID = " + self.psc_id + ",\n" - + "CandID = " + str(self.cand_id) + ",\n" - + "CenterID = " + str(self.center_id) + ",\n" - + "ProjectID = " + str(self.project_id)) - - insert_col = ('PSCID', 'CandID', 'RegistrationCenterID', 'RegistrationProjectID') - insert_val = (self.psc_id, str(self.cand_id), str(self.center_id), str(self.project_id)) - - if self.sex: - insert_col = (*insert_col, 'Sex') - insert_val = (*insert_val, self.sex) - if self.dob: - insert_col = (*insert_col, 'DoB') - insert_val = (*insert_val, self.dob) - - db.insert( - table_name='candidate', - column_names=insert_col, - values=insert_val - ) - - return self.get_candidate_info_from_loris(db) - def get_candidate_info_from_loris(self, db): """ Grep candidate information from the candidate table using the PSCID or CandID. @@ -218,22 +92,6 @@ def map_sex(self, sex): if sex.lower() in ('f', 'female'): self.sex = 'Female' - def grep_bids_dob(self, subject_info): - """ - Greps the date of birth from the BIDS structure and add it to self.dob which - will be inserted into the DoB field of the candidate table - - :param subject_info: dictionary with all information present in the BIDS - participants.tsv file for a given candidate - :type subject_info: dict - """ - - dob_names = ['date_of_birth', 'birth_date', 'dob'] - for name in dob_names: - if name in subject_info: - dob = parse(subject_info[name]) - self.dob = dob.strftime('%Y-%m-%d') - @staticmethod def generate_cand_id(db): """ diff --git a/python/lib/config.py b/python/lib/config.py index e011164bc..e658638e8 100644 --- a/python/lib/config.py +++ b/python/lib/config.py @@ -26,6 +26,15 @@ def get_patient_id_dicom_header_config(env: Env) -> Literal['PatientID', 'Patien return patient_id_dicom_header +def get_default_bids_visit_label_config(env: Env) -> str: + """ + Get the default BIDS visit label from the in-database configuration, or exit the program with + an error if that configuration value does not exist. + """ + + return _get_config_value(env, 'default_bids_vl') + + def get_data_dir_path_config(env: Env) -> str: """ Get the LORIS base data directory path from the in-database configuration, or exit the program diff --git a/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py b/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py index 5a241623f..80c58e7ec 100644 --- a/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py +++ b/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py @@ -7,11 +7,11 @@ import sys import lib.exitcode -from lib.bids import get_bids_json_session_info from lib.db.queries.dicom_archive import try_get_dicom_archive_series_with_series_uid_echo_time from lib.dcm2bids_imaging_pipeline_lib.base_pipeline import BasePipeline from lib.get_session_info import SessionConfigError, get_dicom_archive_session_info -from lib.imaging_lib.nifti import add_nifti_spatial_file_parameters +from lib.imaging_lib.bids.json import get_bids_json_session_info +from lib.imaging_lib.nifti import add_nifti_file_parameters from lib.logging import log_error_exit, log_verbose from lib.util.crypto import compute_file_blake2b_hash, compute_file_md5_hash @@ -74,7 +74,7 @@ def __init__(self, loris_getopt_obj, script_name): # Load the JSON file object with scan parameters if a JSON file was provided # --------------------------------------------------------------------------------------------- self.json_file_dict = self._load_json_sidecar_file() - add_nifti_spatial_file_parameters(self.nifti_path, self.json_file_dict) + add_nifti_file_parameters(self.nifti_path, self.nifti_blake2, self.json_file_dict) # --------------------------------------------------------------------------------- # Determine subject IDs based on DICOM headers and validate the IDs against the DB @@ -560,7 +560,6 @@ def _create_destination_dir_and_move_image_files(self, destination): self.move_file(original_file_path, new_file_path) if destination == 'assembly_bids': - self.json_file_dict['file_blake2b_hash'] = self.nifti_blake2 if self.json_path: self.json_file_dict['bids_json_file'] = json_rel_path self.json_file_dict['bids_json_file_blake2b_hash'] = self.json_blake2 diff --git a/python/lib/eeg.py b/python/lib/eeg.py index 5c08b05e1..f52b87f92 100644 --- a/python/lib/eeg.py +++ b/python/lib/eeg.py @@ -3,19 +3,19 @@ import getpass import json import os -import sys +from typing import Any, Literal import lib.exitcode import lib.utilities as utilities -from lib.candidate import Candidate +from lib.database import Database from lib.database_lib.config import Config from lib.database_lib.physiological_event_archive import PhysiologicalEventArchive from lib.database_lib.physiological_event_file import PhysiologicalEventFile from lib.database_lib.physiological_modality import PhysiologicalModality from lib.database_lib.physiological_output_type import PhysiologicalOutputType +from lib.db.models.session import DbSession +from lib.imaging_lib.bids.dataset import BidsDataType from lib.physiological import Physiological -from lib.scanstsv import ScansTSV -from lib.session import Session from lib.util.crypto import compute_file_blake2b_hash @@ -23,97 +23,33 @@ class Eeg: """ This class reads the BIDS EEG data structure and register the EEG datasets into the database by calling the lib.physiological class. - - :Example: - - from lib.bidsreader import BidsReader - from lib.eeg import Eeg - from lib.database import Database - from lib.database_lib.config import Config - - # database connection - db = Database(config_file.mysql, verbose) - db.connect() - - # grep config settings from the Config module - config_obj = Config(db, verbose) - default_bids_vl = config_obj.get_config('default_bids_vl') - data_dir = config_obj.get_config('dataDirBasepath') - - # load the BIDS directory - bids_reader = BidsReader(bids_dir) - - # create the LORIS_BIDS directory in data_dir based on Name and BIDS version - loris_bids_root_dir = create_loris_bids_directory( - bids_reader, data_dir, verbose - ) - for row in bids_reader.cand_session_modalities_list: - for modality in row['modalities']: - if modality == 'eeg': - bids_session = row['bids_ses_id'] - visit_label = bids_session if bids_session else default_bids_vl - loris_bids_eeg_rel_dir = "sub-" + row['bids_sub_id'] + "/" + \ - "ses-" + visit_label + "/eeg/" - lib.utilities.create_dir( - loris_bids_root_dir + loris_bids_eeg_rel_dir, verbose - ) - Eeg( - bids_reader = bids_reader, - bids_sub_id = row['bids_sub_id'], - bids_ses_id = row['bids_ses_id'], - bids_modality = modality, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_eeg_rel_dir = loris_bids_eeg_rel_dir, - loris_bids_root_dir = loris_bids_root_dir, - dataset_tag_dict = dataset_tag_dict - ) - - # disconnect from the database - db.disconnect() """ - def __init__(self, bids_reader, bids_sub_id, bids_ses_id, bids_modality, db, - verbose, data_dir, default_visit_label, loris_bids_eeg_rel_dir, - loris_bids_root_dir, dataset_tag_dict, dataset_type): + def __init__( + self, data_type: BidsDataType, session: DbSession, db: Database, verbose: bool, data_dir: str, + loris_bids_eeg_rel_dir: str, loris_bids_root_dir: str | None, dataset_tag_dict: dict[Any, Any], + dataset_type: Literal['raw', 'derivative'] | None, + ): """ Constructor method for the Eeg class. - :param bids_reader : dictionary with BIDS reader information - :type bids_reader : dict - :param bids_sub_id : BIDS subject ID (that will be used as PSCID) - :type bids_sub_id : str - :param bids_ses_id : BIDS session ID (that will be used for the visit label) - :type bids_ses_id : str - :param bids_modality: BIDS modality (a.k.a. EEG) - :tyoe bids_modality: str + :param data_type : The BIDS data type object. + :param session : The session database object. :param db : Database class object - :type db : object :param verbose : whether to be verbose - :type verbose : bool :param data_dir : LORIS data directory path (usually /data/PROJECT/data) - :type data_dir : str - :param default_visit_label : default visit label to be used if no BIDS - session are present in the BIDS structure - :type default_visit_label : str :param loris_bids_eeg_rel_dir: LORIS BIDS EEG relative dir path to data_dir - :type loris_bids_eeg_rel_dir: str :param loris_bids_root_dir : LORIS BIDS root directory path - :type loris_bids_root_dir : str :param dataset_tag_dict : Dict of dataset-inherited HED tags - :type dataset_tag_dict : dict :param dataset_type : raw | derivative. Type of the dataset - :type dataset_type : string """ # config self.config_db_obj = Config(db, verbose) # load bids objects - self.bids_reader = bids_reader - self.bids_layout = bids_reader.bids_layout + self.data_type = data_type + self.bids_layout = data_type.root_dataset.layout # load the LORIS BIDS import root directory where the eeg files will # be copied @@ -121,11 +57,6 @@ def __init__(self, bids_reader, bids_sub_id, bids_ses_id, bids_modality, db, self.loris_bids_root_dir = loris_bids_root_dir self.data_dir = data_dir - # load bids subject, visit and modality - self.bids_sub_id = bids_sub_id - self.bids_ses_id = bids_ses_id - self.bids_modality = bids_modality - # load dataset tag dict. Used to ensure HED tags aren't duplicated self.dataset_tag_dict = dataset_tag_dict @@ -134,35 +65,19 @@ def __init__(self, bids_reader, bids_sub_id, bids_ses_id, bids_modality, db, self.verbose = verbose # find corresponding CandID and SessionID in LORIS - self.loris_cand_info = self.get_loris_cand_info() - self.default_vl = default_visit_label - self.psc_id = self.loris_cand_info['PSCID'] - self.cand_id = self.loris_cand_info['CandID'] - self.center_id = self.loris_cand_info['RegistrationCenterID'] - self.project_id = self.loris_cand_info['RegistrationProjectID'] + self.session = session hed_query = 'SELECT * FROM hed_schema_nodes WHERE 1' self.hed_union = self.db.pselect(query=hed_query, args=()) - self.cohort_id = None - for row in bids_reader.participants_info: - if not row['participant_id'] == self.bids_sub_id: - continue - if 'cohort' in row: - cohort_info = db.pselect( - "SELECT CohortID FROM cohort WHERE title = %s", - [row['cohort'], ] - ) - if len(cohort_info) > 0: - self.cohort_id = cohort_info[0]['CohortID'] - break - - self.session_id = self.get_loris_session_id() - # check if a tsv with acquisition dates or age is available for the subject self.scans_file = None - if self.bids_layout.get(suffix='scans', subject=self.bids_sub_id, return_type='filename'): - self.scans_file = self.bids_layout.get(suffix='scans', subject=self.bids_sub_id, return_type='filename')[0] + if self.bids_layout.get(suffix='scans', subject=self.data_type.subject.label, return_type='filename'): + self.scans_file = self.bids_layout.get( + suffix='scans', + subject=self.data_type.subject.label, + return_type='filename' + )[0] # register the data into LORIS if (dataset_type and dataset_type == 'raw'): @@ -173,59 +88,6 @@ def __init__(self, bids_reader, bids_sub_id, bids_ses_id, bids_modality, db, self.register_data() self.register_data(derivatives=True) - def get_loris_cand_info(self): - """ - Gets the LORIS Candidate info for the BIDS subject. - - :return: Candidate info of the subject found in the database - :rtype: list - """ - - candidate = Candidate(verbose=self.verbose, cand_id=self.bids_sub_id) - loris_cand_info = candidate.get_candidate_info_from_loris(self.db) - - if not loris_cand_info: - candidate = Candidate(verbose=self.verbose, psc_id=self.bids_sub_id) - loris_cand_info = candidate.get_candidate_info_from_loris(self.db) - - if not loris_cand_info: - print("Candidate " + self.bids_sub_id + " not found. You can retry with the --createcandidate option.\n") - sys.exit(lib.exitcode.CANDIDATE_NOT_FOUND) - - return loris_cand_info - - def get_loris_session_id(self): - """ - Greps the LORIS session.ID corresponding to the BIDS visit. Note, - if no BIDS visit are set, will use the default visit label value set - in the config module - - :return: the session's ID in LORIS - :rtype: int - """ - - # check if there are any visit label in BIDS structure, if not, - # will use the default visit label set in the config module - visit_label = self.bids_ses_id if self.bids_ses_id else self.default_vl - - session = Session( - self.db, self.verbose, self.cand_id, visit_label, - self.center_id, self.project_id, self.cohort_id - ) - loris_vl_info = session.get_session_info_from_loris() - - if not loris_vl_info: - message = "ERROR: visit label " + visit_label + " does not exist in " + \ - "the session table for candidate " + str(self.cand_id) + \ - "\nPlease make sure the visit label is created in the " + \ - "database or run bids_import.py with the -s option -s if " + \ - "you wish that the insertion pipeline creates the visit " + \ - "label in the session table." - print(message) - exit(lib.exitcode.SELECT_FAILURE) - - return loris_vl_info['ID'] - def grep_bids_files(self, bids_type): """ Greps the BIDS files and their layout information from the BIDSLayout @@ -239,18 +101,18 @@ def grep_bids_files(self, bids_type): :rtype: list """ - if self.bids_ses_id: + if self.data_type.session.label: return self.bids_layout.get( - subject = self.bids_sub_id, - session = self.bids_ses_id, - datatype = self.bids_modality, + subject = self.data_type.subject.label, + session = self.data_type.session.label, + datatype = self.data_type.name, suffix = bids_type, return_type = 'filename' ) else: return self.bids_layout.get( - subject = self.bids_sub_id, - datatype = self.bids_modality, + subject = self.data_type.subject.label, + datatype = self.data_type.name, suffix = bids_type, return_type = 'filename' ) @@ -371,17 +233,17 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): if detect: # TODO if derivatives, grep the source file as well as the input file ID??? eeg_files = self.bids_layout.get( - subject = self.bids_sub_id, - session = self.bids_ses_id, + subject = self.data_type.subject.label, + session = self.data_type.session.label, scope = 'derivatives' if derivatives else 'raw', - suffix = self.bids_modality, + suffix = self.data_type.name, extension = ['set', 'edf', 'vhdr', 'vmrk', 'eeg', 'bdf'] ) else: eeg_files = self.bids_layout.get( - subject = self.bids_sub_id, - session = self.bids_ses_id, - suffix = self.bids_modality, + subject = self.data_type.subject.label, + session = self.data_type.session.label, + suffix = self.data_type.name, extension = ['set', 'edf', 'vhdr', 'vmrk', 'eeg', 'bdf'] ) @@ -395,7 +257,7 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): return_type = 'tuple', strict=False, extension = 'json', - suffix = self.bids_modality, + suffix = self.data_type.name, all_ = False, full_search = False, ) @@ -438,15 +300,14 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): # get the acquisition date of the EEG file or the age at the time of the EEG recording eeg_acq_time = None if self.scans_file: - scan_info = ScansTSV(self.scans_file, eeg_file.path, self.verbose) - eeg_acq_time = scan_info.get_acquisition_time() - eeg_file_data['age_at_scan'] = scan_info.get_age_at_scan() + tsv_scan = self.data_type.session.get_tsv_scan(os.path.basename(self.scans_file)) + + eeg_acq_time = tsv_scan.acquisition_time + eeg_file_data['age_at_scan'] = tsv_scan.age_at_scan if self.loris_bids_root_dir: # copy the scans.tsv file to the LORIS BIDS import directory - scans_path = scan_info.copy_scans_tsv_file_to_loris_bids_dir( - self.bids_sub_id, self.loris_bids_root_dir, self.data_dir - ) + scans_path = self.copy_scans_tsv_file_to_loris_bids_dir() eeg_file_data['scans_tsv_file'] = scans_path scans_blake2 = compute_file_blake2b_hash(self.scans_file) @@ -481,7 +342,7 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): if not physio_file_id: # grep the modality ID from physiological_modality table - modality_id = physiological_modality.grep_id_from_modality_value(self.bids_modality) + modality_id = physiological_modality.grep_id_from_modality_value(self.data_type.name) eeg_path = eeg_file.path.replace(self.data_dir, '') if self.loris_bids_root_dir: @@ -495,7 +356,7 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): eeg_file_info = { 'FileType': file_type, 'FilePath': eeg_path, - 'SessionID': self.session_id, + 'SessionID': self.session.id, 'AcquisitionTime': eeg_acq_time, 'InsertedByUser': getpass.getuser(), 'PhysiologicalOutputTypeID': output_type_id, @@ -601,7 +462,7 @@ def fetch_and_insert_electrode_file( suffix = 'coordsystem', all_ = False, full_search = False, - subject=self.bids_sub_id, + subject=self.data_type.subject.label, ) if not coordsystem_metadata_file: message = '\nWARNING: no electrode metadata files (coordsystem.json) ' \ @@ -762,7 +623,7 @@ def fetch_and_insert_event_files( suffix = 'events', all_ = False, full_search = False, - subject=self.bids_sub_id, + subject=self.data_type.subject.label, ) inheritance = False @@ -787,7 +648,7 @@ def fetch_and_insert_event_files( event_metadata=event_metadata, event_metadata_file=event_metadata_path, physiological_file_id=physiological_file_id, - project_id=self.project_id, + project_id=self.session.project_id, blake2=blake2, project_wide=False, hed_union=self.hed_union @@ -810,7 +671,7 @@ def fetch_and_insert_event_files( event_data=event_data, event_file=event_path, physiological_file_id=physiological_file_id, - project_id=self.project_id, + project_id=self.session.project_id, blake2=blake2, dataset_tag_dict=self.dataset_tag_dict, file_tag_dict=file_tag_dict, @@ -857,15 +718,15 @@ def copy_file_to_loris_bids_dir(self, file, derivatives=False, inheritance=False copy_file = "" if not inheritance: copy_file = self.loris_bids_eeg_rel_dir - if self.bids_ses_id: + if self.data_type.session.label: copy_file += os.path.basename(file) else: # make sure the ses- is included in the new filename if using # default visit label from the LORIS config copy_file += str.replace( os.path.basename(file), - "sub-" + self.bids_sub_id, - "sub-" + self.bids_sub_id + "_ses-" + self.default_vl + "sub-" + self.data_type.subject.label, + "sub-" + self.data_type.subject.label + "_ses-" + self.default_vl ) copy_file = self.loris_bids_root_dir + copy_file diff --git a/python/lib/imaging_lib/bids/dataset.py b/python/lib/imaging_lib/bids/dataset.py new file mode 100644 index 000000000..37664ed1f --- /dev/null +++ b/python/lib/imaging_lib/bids/dataset.py @@ -0,0 +1,360 @@ +import os +import re +from collections.abc import Iterator +from functools import cached_property + +from bids import BIDSLayout + +from lib.imaging_lib.bids.dataset_description import BidsDatasetDescription +from lib.imaging_lib.bids.tsv_participants import BidsTsvParticipant, read_bids_participants_tsv_file +from lib.imaging_lib.bids.tsv_scans import BidsTsvScan, read_bids_scans_tsv_file +from lib.imaging_lib.nifti import find_dir_nifti_names +from lib.util.fs import replace_file_extension, search_dir_file_with_regex +from lib.util.iter import find + +PYBIDS_IGNORE = ['code', 'sourcedata', 'log', '.git'] + +PYBIDS_FORCE = [re.compile(r"_annotations\.(tsv|json)$")] + + +class BidsDataset: + path: str + validate: bool + + def __init__(self, bids_path: str, validate: bool): + self.path = bids_path + self.validate = validate + + @property + def sessions(self) -> Iterator['BidsSession']: + for subject in self.subjects: + yield from subject.sessions + + @property + def data_types(self) -> Iterator['BidsDataType']: + for session in self.sessions: + yield from session.data_types + + @property + def niftis(self) -> Iterator['BidsNifti']: + for data_type in self.data_types: + yield from data_type.niftis + + @cached_property + def subjects(self) -> list['BidsSubject']: + """ + The subject directories found in the BIDS dataset. + """ + + subjects: list[BidsSubject] = [] + + for file in os.scandir(self.path): + subject_match = re.match(r'sub-([a-zA-Z0-9]+)', file.name) + if subject_match is None: + continue + + if not os.path.isdir(file): + continue + + subject_label = subject_match.group(1) + subjects.append(BidsSubject(self, subject_label)) + + return subjects + + def get_dataset_description(self) -> 'BidsDatasetDescription | None': + """ + Read the BIDS dataset description file of this BIDS dataset. Return `None` if no dataset + description file is present in the dataset, or raise an exeption if the file is present but + does contains incorrect data. + """ + + dataset_description_path = os.path.join(self.path, 'dataset_description.json') + if not os.path.exists(dataset_description_path): + return None + + return BidsDatasetDescription(dataset_description_path) + + @cached_property + def tsv_participants(self) -> dict[str, BidsTsvParticipant] | None: + """ + The set of participants in the 'participants.tsv' file of this BIDS dataset if it is + present. This property might raise an exception if the file is present but incorrect. + """ + + tsv_participants_path = os.path.join(self.path, 'participants.tsv') + if not os.path.exists(tsv_participants_path): + return None + + return read_bids_participants_tsv_file(tsv_participants_path) + + @cached_property + def subject_labels(self) -> list[str]: + """ + All the subject labels found in the BIDS dataset. + """ + + subject_labels = list(set(subject.label for subject in self.subjects)) + subject_labels.sort() + return subject_labels + + @cached_property + def session_labels(self) -> list[str]: + """ + All the session labels found in this BIDS dataset. + """ + + session_labels = list(set(session.label for session in self.sessions if session.label is not None)) + session_labels.sort() + return session_labels + + def get_subject(self, subject_label: str) -> 'BidsSubject | None': + """ + Get the subject directory corresponding to a subject label in this BIDS dataset or `None` + if it does not exist. + """ + + return find(lambda subject: subject.label == subject_label, self.subjects) + + def get_tsv_participant(self, participant_id: str) -> 'BidsTsvParticipant | None': + """ + Get the `participants.tsv` record corresponding to a participant ID in this BIDS dataset + or `None` if it does not exist. + """ + + if self.tsv_participants is None: + return None + + return self.tsv_participants.get(participant_id) + + @cached_property + def layout(self) -> BIDSLayout: + """ + Get the PyBIDS BIDSLayout for the BIDS dataset. + """ + + return BIDSLayout( + root = self.path, + ignore = PYBIDS_IGNORE, + force_index = PYBIDS_FORCE, + derivatives = True, + validate = self.validate + ) + + +class BidsSubject: + root_dataset: BidsDataset + label: str + path: str + + def __init__(self, root_dataset: BidsDataset, label: str): + self.root_dataset = root_dataset + self.label = label + self.path = os.path.join(self.root_dataset.path, f'sub-{self.label}') + + @property + def data_types(self) -> Iterator['BidsDataType']: + for session in self.sessions: + yield from session.data_types + + @property + def niftis(self) -> Iterator['BidsNifti']: + for data_type in self.data_types: + yield from data_type.niftis + + @cached_property + def sessions(self) -> list['BidsSession']: + """ + The session directories found in this subject directory. + """ + + sessions: list[BidsSession] = [] + + for file in os.scandir(self.path): + if not os.path.isdir(file): + continue + + session_match = re.match(r'ses-([a-zA-Z0-9]+)', file.name) + if session_match is None: + continue + + session_label = session_match.group(1) + sessions.append(BidsSession(self, session_label)) + + if sessions == []: + sessions.append(BidsSession(self, None)) + + return sessions + + def get_session(self, session_label: str) -> 'BidsSession | None': + """ + Get a session directory of this subject directory or `None` if it does not exist. + """ + + return find(lambda session: session.label == session_label, self.sessions) + + +class BidsSession: + subject: BidsSubject + label: str | None + path: str + tsv_scans_path: str | None + + def __init__(self, subject: BidsSubject, label: str | None): + self.subject = subject + self.label = label + if label is None: + self.path = self.subject.path + else: + self.path = os.path.join(self.subject.path, f'ses-{self.label}') + + tsv_scans_name = search_dir_file_with_regex(self.path, r'scans.tsv$') + if tsv_scans_name is not None: + self.tsv_scans_path = os.path.join(self.path, tsv_scans_name) + else: + self.tsv_scans_path = None + + @property + def root_dataset(self) -> BidsDataset: + return self.subject.root_dataset + + @property + def niftis(self) -> Iterator['BidsNifti']: + for data_type in self.data_types: + yield from data_type.niftis + + @cached_property + def data_types(self) -> list['BidsDataType']: + """ + The data type directories found in this session directory. + """ + + data_types: list[BidsDataType] = [] + + for file in os.scandir(self.path): + if not os.path.isdir(file): + continue + + data_types.append(BidsDataType(self, file.name)) + + return data_types + + @cached_property + def tsv_scans(self) -> dict[str, BidsTsvScan] | None: + """ + The set of scans in the 'scans.tsv' file of this BIDS directory if it is present. This + property might raise an exception if the file is present but incorrect. + """ + + if self.tsv_scans_path is None: + return None + + return read_bids_scans_tsv_file(self.tsv_scans_path) + + def get_tsv_scan(self, file_name: str) -> 'BidsTsvScan | None': + """ + Get the `scans.tsv` record corresponding to a file name of this session directory or `None` + if it does not exist. + """ + + if self.tsv_scans is None: + return None + + return self.tsv_scans.get(file_name) + + +class BidsDataType: + session: BidsSession + name: str + path: str + + def __init__(self, session: BidsSession, name: str): + self.session = session + self.name = name + self.path = os.path.join(self.session.path, self.name) + + @property + def root_dataset(self) -> BidsDataset: + return self.session.root_dataset + + @property + def subject(self) -> BidsSubject: + return self.session.subject + + @cached_property + def niftis(self) -> list['BidsNifti']: + """ + The NIfTI files found in this data type directory. + """ + + niftis: list[BidsNifti] = [] + + for nifti_name in find_dir_nifti_names(self.path): + niftis.append(BidsNifti(self, nifti_name)) + + return niftis + + +class BidsNifti: + data_type: BidsDataType + name: str + path: str + suffix: str | None + + def __init__(self, data_type: BidsDataType, name: str): + self.data_type = data_type + self.path = os.path.join(self.data_type.path, name) + self.name = name + + suffix_match = re.search(r'_([a-zA-Z0-9]+)\.nii(\.gz)?$', self.name) + if suffix_match is not None: + self.suffix = suffix_match.group(1) + else: + self.suffix = None + + @property + def root_dataset(self) -> BidsDataset: + return self.data_type.root_dataset + + @property + def subject(self) -> BidsSubject: + return self.data_type.subject + + @property + def session(self) -> BidsSession: + return self.data_type.session + + def get_json_path(self) -> str | None: + """ + Get the JSON sidecar file path of this NIfTI file if it exists. + """ + + json_name = replace_file_extension(self.name, 'json') + json_path = os.path.join(self.data_type.path, json_name) + if not os.path.exists(json_path): + return None + + return json_path + + def get_bval_path(self) -> str | None: + """ + Get the BVAL file path of this NIfTI file if it exists. + """ + + bval_name = replace_file_extension(self.name, 'bval') + bval_path = os.path.join(self.data_type.path, bval_name) + if not os.path.exists(bval_path): + return None + + return bval_path + + def get_bvec_path(self) -> str | None: + """ + Get the BVEC file path of this NIfTI file if it exists. + """ + + bvec_name = replace_file_extension(self.name, 'bvec') + bvec_path = os.path.join(self.data_type.path, bvec_name) + if not os.path.exists(bvec_path): + return None + + return bvec_path diff --git a/python/lib/imaging_lib/bids/dataset_description.py b/python/lib/imaging_lib/bids/dataset_description.py new file mode 100644 index 000000000..15c1b8a0b --- /dev/null +++ b/python/lib/imaging_lib/bids/dataset_description.py @@ -0,0 +1,54 @@ +import json +from typing import Any + + +class BidsDatasetDescriptionError(ValueError): + """ + Error raised when reading an incorrect BIDS dataset description file. + """ + + def __init__(self, message: str): + super().__init__(message) + + +class BidsDatasetDescription: + """ + Information about the contents of a BIDS dataset description file. + """ + + name: str + """ + The BIDS dataset name. + """ + + bids_version: str + """ + The BIDS dataset BIDS version. + """ + + json: dict[str, Any] + """ + The BIDS dataset description JSON data. + """ + + def __init__(self, dataset_descrption_path: str): + """ + Read a BIDS dataset description file, or raise an exception if that file contains incorrect + data. + """ + + with open(dataset_descrption_path) as dataset_description_file: + try: + self.json = json.load(dataset_description_file) + except ValueError: + raise BidsDatasetDescriptionError("The BIDS dataset description file does not contain valid JSON.") + + try: + self.name = self.json["Name"] + except ValueError: + raise BidsDatasetDescriptionError("Missing property 'Name' in the BIDS dataset description file.") + + try: + self.bids_version = self.json["BIDSVersion"] + except ValueError: + raise BidsDatasetDescriptionError("Missing property 'BIDSVersion' in the BIDS dataset description file.") diff --git a/python/lib/bids.py b/python/lib/imaging_lib/bids/json.py similarity index 64% rename from python/lib/bids.py rename to python/lib/imaging_lib/bids/json.py index fe616d42d..3ecfa9184 100644 --- a/python/lib/bids.py +++ b/python/lib/imaging_lib/bids/json.py @@ -1,9 +1,12 @@ +import json from typing import Any from lib.config import get_patient_id_dicom_header_config from lib.env import Env from lib.get_session_info import SessionInfo, get_session_info from lib.imaging_lib.mri_scanner import MriScannerInfo +from lib.import_bids_dataset.imaging import map_bids_param_to_loris_param +from lib.util.crypto import compute_file_blake2b_hash def get_bids_json_scanner_info(bids_json: dict[str, Any]) -> MriScannerInfo: @@ -36,3 +39,18 @@ def get_bids_json_session_info(env: Env, bids_json: dict[str, Any]) -> SessionIn scanner_info = get_bids_json_scanner_info(bids_json) return get_session_info(env, patient_id, scanner_info) + + +def add_bids_json_file_parameters(env: Env, bids_json_path: str, rel_json_path: str, file_parameters: dict[str, Any]): + """ + Read a BIDS JSON sidecar file and add its parameters to a LORIS file parameters dictionary. + """ + + with open(bids_json_path) as data_file: + file_parameters.update(json.load(data_file)) + map_bids_param_to_loris_param(env, file_parameters) + + json_blake2 = compute_file_blake2b_hash(bids_json_path) + + file_parameters['bids_json_file'] = rel_json_path + file_parameters['bids_json_file_blake2b_hash'] = json_blake2 diff --git a/python/lib/imaging_lib/bids/tsv_participants.py b/python/lib/imaging_lib/bids/tsv_participants.py new file mode 100644 index 000000000..f0deafe81 --- /dev/null +++ b/python/lib/imaging_lib/bids/tsv_participants.py @@ -0,0 +1,123 @@ +import csv +import re +from dataclasses import dataclass + +from dateutil.parser import ParserError, parse + + +@dataclass +class BidsTsvParticipant: + """ + Information about a participant found in a row of the `participants.tsv` file of a BIDS + dataset. + """ + + id: str + birth_date: str | None = None + sex: str | None = None + age: str | None = None + site: str | None = None + cohort: str | None = None + project: str | None = None + + +def read_bids_participants_tsv_file(participants_tsv_path: str) -> dict[str, BidsTsvParticipant]: + """ + Read the `participants.tsv` file of a BIDS dataset and get the participant rows indexed by + participant ID. Raise an exception if the `participants.tsv` file is incorrect. + """ + + tsv_participants: dict[str, BidsTsvParticipant] = {} + with open(participants_tsv_path) as participants_tsv_file: + reader = csv.DictReader(participants_tsv_file.readlines(), delimiter='\t') + if reader.fieldnames is None or 'participant_id' not in reader.fieldnames: + raise Exception(f"Missing 'participant_id' field in participants.tsv file '{participants_tsv_path}'.") + + for tsv_participant_row in reader: + tsv_participant = read_bids_participants_tsv_row(tsv_participant_row, participants_tsv_path) + tsv_participants[tsv_participant.id] = tsv_participant + + return tsv_participants + + +def read_bids_participants_tsv_row( + tsv_participant_row: dict[str, str], + participants_tsv_path: str, +) -> BidsTsvParticipant: + """ + Read a `participants.tsv` row, or raise an exception if that row is incorrect. + """ + + # Get the participant ID and removing the `sub-` prefix if it is present. + full_participant_id = tsv_participant_row.get('participant_id') + if full_participant_id is None: + raise Exception(f"Missing 'participant_id' value in participants.tsv file '{participants_tsv_path}'.") + + participant_id = re.sub(r'^sub-', '', full_participant_id) + + birth_date = _read_birth_date(tsv_participant_row) + cohort = _read_cohort(tsv_participant_row) + + # Create the BIDS participant object. + return BidsTsvParticipant( + id = participant_id, + birth_date = birth_date, + sex = tsv_participant_row.get('sex'), + age = tsv_participant_row.get('age'), + site = tsv_participant_row.get('site'), + project = tsv_participant_row.get('project'), + cohort = cohort, + ) + + +def write_bids_participants_tsv_file(tsv_participants: dict[str, BidsTsvParticipant], participants_file_path: str): + """ + Write the `participants.tsv` file based from a set of participant rows. + """ + + with open(participants_file_path, 'w') as participants_file: + writer = csv.writer(participants_file, delimiter='\t') + writer.writerow(['participant_id']) + + for tsv_participant in sorted(tsv_participants.values(), key=lambda tsv_participant: tsv_participant.id): + writer.writerow([tsv_participant.id]) + + +def merge_bids_tsv_participants( + tsv_participants: dict[str, BidsTsvParticipant], + new_tsv_participants: dict[str, BidsTsvParticipant], +): + """ + Copy a set of participants.tsv rows into another one. The rows of the first set are replaced by + those of these second if there are duplicates. + """ + + for new_tsv_participant in new_tsv_participants.values(): + tsv_participants[new_tsv_participant.id] = new_tsv_participant + + +def _read_birth_date(tsv_participant_row: dict[str, str]) -> str | None: + """ + Read the date of birth field of a participant from a `participants.tsv` row. + """ + + for birth_date_field_ame in ['date_of_birth', 'birth_date', 'dob']: + if birth_date_field_ame in tsv_participant_row: + try: + return parse(tsv_participant_row[birth_date_field_ame]).strftime('%Y-%m-%d') + except ParserError: + pass + + return None + + +def _read_cohort(tsv_participant_row: dict[str, str]) -> str | None: + """ + Read the cohort field of a participant from a `participants.tsv` row. + """ + + for cohort_field_name in ['cohort', 'subproject']: + if cohort_field_name in tsv_participant_row: + return tsv_participant_row[cohort_field_name] + + return None diff --git a/python/lib/imaging_lib/bids/tsv_scans.py b/python/lib/imaging_lib/bids/tsv_scans.py new file mode 100644 index 000000000..82c319de8 --- /dev/null +++ b/python/lib/imaging_lib/bids/tsv_scans.py @@ -0,0 +1,126 @@ +import csv +from dataclasses import dataclass +from datetime import datetime +from typing import Any + +from dateutil.parser import ParserError, parse + +from lib.util.crypto import compute_file_blake2b_hash + + +@dataclass +class BidsTsvScan: + """ + Information about a scan found in a row of a `scans.tsv` file of a BIDS dataset. + """ + + file_name : str + acquisition_time : datetime | None + age_at_scan : str | None + + +def read_bids_scans_tsv_file(scans_tsv_path: str) -> dict[str, BidsTsvScan]: + """ + Read a `scans.tsv` file of a BIDS dataset and get the scan rows indexed by file name. Raise an + exception if the `scans.tsv` file is incorrect. + """ + + tsv_scans: dict[str, BidsTsvScan] = {} + with open(scans_tsv_path) as scans_tsv_file: + reader = csv.DictReader(scans_tsv_file.readlines(), delimiter='\t') + if reader.fieldnames is None or 'filename' not in reader.fieldnames: + raise Exception(f"Missing 'filename' field in scans.tsv file '{scans_tsv_path}'.") + + for tsv_scan_row in reader: + tsv_row = read_bids_scans_tsv_row(tsv_scan_row, scans_tsv_path) + tsv_scans[tsv_row.file_name] = tsv_row + + return tsv_scans + + +def read_bids_scans_tsv_row(tsv_scan_row: dict[str, str], scans_tsv_path: str) -> BidsTsvScan: + """ + Read a `scans.tsv` row, or raise an exception if that row is incorrect. + """ + + file_name = tsv_scan_row.get('filename') + if file_name is None: + raise Exception(f"Missing 'filename' value in scans.tsv file '{scans_tsv_path}'.") + + acquisition_time = _read_acquisition_time(tsv_scan_row) + age_at_scan = _read_age_at_scan(tsv_scan_row) + + return BidsTsvScan( + file_name = file_name, + acquisition_time = acquisition_time, + age_at_scan = age_at_scan, + ) + + +def write_bids_scans_tsv_file(tsv_scans: dict[str, BidsTsvScan], scans_tsv_path: str): + """ + Write the `scans.tsv` file from a set of scan rows. + """ + + with open(scans_tsv_path, 'w') as scans_tsv_file: + writer = csv.writer(scans_tsv_file, delimiter='\t') + writer.writerow(['filename', 'acq_time', 'age_at_scan']) + + for tsv_scan in sorted(tsv_scans.values(), key=lambda tsv_scan: tsv_scan.file_name): + writer.writerow([ + tsv_scan.file_name, + tsv_scan.acquisition_time, + tsv_scan.age_at_scan + ]) + + +def merge_bids_tsv_scans(tsv_scans: dict[str, BidsTsvScan], new_tsv_scans: dict[str, BidsTsvScan]): + """ + Copy a set of scans.tsv rows into another one. The rows of the first set are replaced by those + of these second if there are duplicates. + """ + + for new_tsv_scan in new_tsv_scans.values(): + tsv_scans[new_tsv_scan.file_name] = new_tsv_scan + + +def _read_acquisition_time(tsv_scan_row: dict[str, str]) -> datetime | None: + """ + Read the acquisition time field of a scan from a `scans.tsv` row. + """ + + for field_name in ['acq_time', 'mri_acq_time', 'eeg_acq_time']: + acquisition_time = tsv_scan_row.get(field_name) + if acquisition_time is None or acquisition_time == 'n/a': + continue + + try: + return parse(acquisition_time) + except ParserError: + pass + + return None + + +def _read_age_at_scan(tsv_scan_row: dict[str, str]) -> str | None: + """ + Read the age at scan field of a scan from a `scans.tsv` row. + """ + + for field_name in ['age', 'age_at_scan', 'age_acq_time']: + age_at_scan = tsv_scan_row.get(field_name) + if age_at_scan is not None: + return age_at_scan.strip() + + return None + + +def add_scan_tsv_file_parameters(scan_tsv: BidsTsvScan, scans_tsv_path: str, file_parameters: dict[str, Any]): + """ + Add a scans.tsv file and row parameters to a LORIS file parameters dictionary. + """ + + file_parameters['scan_acquisition_time'] = scan_tsv.acquisition_time + file_parameters['age_at_scan'] = scan_tsv.age_at_scan + file_parameters['scans_tsv_file'] = scans_tsv_path + file_parameters['scans_tsv_file_bake2hash'] = compute_file_blake2b_hash(scans_tsv_path) diff --git a/python/lib/imaging_lib/bids/util.py b/python/lib/imaging_lib/bids/util.py new file mode 100644 index 000000000..89b9d5658 --- /dev/null +++ b/python/lib/imaging_lib/bids/util.py @@ -0,0 +1,20 @@ +import re + +from lib.db.queries.imaging_file_type import get_all_imaging_file_types +from lib.env import Env + + +def determine_bids_file_type(env: Env, file_name: str) -> str | None: + """ + Determine the file type of a BIDS file from the database using its name, or return `None` if no + corresponding file type is found. + """ + + imaging_file_types = get_all_imaging_file_types(env.db) + + for imaging_file_type in imaging_file_types: + regex = re.escape(imaging_file_type.type) + r'(\.gz)?$' + if re.search(regex, file_name): + return imaging_file_type.type + + return None diff --git a/python/lib/imaging_lib/file.py b/python/lib/imaging_lib/file.py new file mode 100644 index 000000000..64eb4f9c2 --- /dev/null +++ b/python/lib/imaging_lib/file.py @@ -0,0 +1,45 @@ +import getpass +from datetime import datetime + +from lib.db.models.file import DbFile +from lib.db.models.mri_scan_type import DbMriScanType +from lib.db.models.session import DbSession +from lib.env import Env + + +def register_imaging_file( + env: Env, + file_type: str, + file_rel_path: str, + session: DbSession, + mri_scan_type: DbMriScanType | None, + echo_time: float | None, + echo_number: str | None, + phase_encoding_direction: str | None, +) -> DbFile: + """ + Register an imaging file in the database. + """ + + user = getpass.getuser() + time = datetime.now() + + file = DbFile( + file_type = file_type, + rel_path = file_rel_path, + session_id = session.id, + inserted_by_user_id = user, + insert_time = time, + coordinate_space = 'native', + output_type = 'native', + echo_time = echo_time, + echo_number = echo_number, + phase_encoding_direction = phase_encoding_direction, + source_file_id = None, + scan_type_id = mri_scan_type.id if mri_scan_type is not None else None, + ) + + env.db.add(file) + env.db.commit() + + return file diff --git a/python/lib/imaging_lib/file_parameter.py b/python/lib/imaging_lib/file_parameter.py new file mode 100644 index 000000000..c1e1cb941 --- /dev/null +++ b/python/lib/imaging_lib/file_parameter.py @@ -0,0 +1,81 @@ +from datetime import datetime +from typing import Any + +from lib.db.models.file import DbFile +from lib.db.models.file_parameter import DbFileParameter +from lib.db.models.parameter_type import DbParameterType +from lib.db.models.parameter_type_category_rel import DbParameterTypeCategoryRel +from lib.db.queries.file_parameter import try_get_file_parameter_with_file_id_type_id +from lib.db.queries.parameter_type import get_parameter_type_category_with_name, try_get_parameter_type_with_name +from lib.env import Env + + +def register_file_parameters(env: Env, file: DbFile, parameter_infos: dict[str, Any]): + """ + Insert or upate some file parameters with the provided parameter names and values. + """ + + for parameter_name, parameter_value in parameter_infos.items(): + register_file_parameter(env, file, parameter_name, parameter_value) + + +def register_file_parameter(env: Env, file: DbFile, parameter_name: str, parameter_value: Any): + """ + Insert or upate a file parameter with the provided parameter name and value. + """ + + if isinstance(parameter_value, list): + parameter_values = map(lambda parameter_value: str(parameter_value), parameter_value) # type: ignore + parameter_value = f"[{', '.join(parameter_values)}]" + + parameter_type = get_or_create_parameter_type(env, parameter_name) + + parameter = try_get_file_parameter_with_file_id_type_id(env.db, file.id, parameter_type.id) + if parameter is None: + time = datetime.now() + + parameter = DbFileParameter( + type_id = parameter_type.id, + file_id = file.id, + value = parameter_value, + insert_time = time, + ) + + env.db.add(parameter) + else: + parameter.value = parameter_value + + env.db.commit() + + +def get_or_create_parameter_type(env: Env, parameter_name: str) -> DbParameterType: + """ + Get a parameter type using its name, or create that parameter if it does not exist. + """ + + parameter_type = try_get_parameter_type_with_name(env.db, parameter_name) + if parameter_type is not None: + return parameter_type + + parameter_type = DbParameterType( + name = parameter_name, + alias = None, + data_type = 'text', + description = f'{parameter_name} created by the lib.imaging.file_parameter Python module', + source_from = 'parameter_file', + queryable = False, + ) + + env.db.add(parameter_type) + env.db.commit() + + parameter_type_category = get_parameter_type_category_with_name(env.db, 'MRI Variables') + parameter_type_category_rel = DbParameterTypeCategoryRel( + parameter_type_id = parameter_type.id, + parameter_type_category_id = parameter_type_category.id, + ) + + env.db.add(parameter_type_category_rel) + env.db.commit() + + return parameter_type diff --git a/python/lib/imaging_lib/mri_scan_type.py b/python/lib/imaging_lib/mri_scan_type.py new file mode 100644 index 000000000..df648affd --- /dev/null +++ b/python/lib/imaging_lib/mri_scan_type.py @@ -0,0 +1,17 @@ +from lib.db.models.mri_scan_type import DbMriScanType +from lib.env import Env + + +def create_mri_scan_type(env: Env, name: str) -> DbMriScanType: + """ + Create an MRI scan type in the database. + """ + + mri_scan_type = DbMriScanType( + name = name, + ) + + env.db.add(mri_scan_type) + env.db.commit() + + return mri_scan_type diff --git a/python/lib/imaging_lib/nifti.py b/python/lib/imaging_lib/nifti.py index 7d8c5697f..3a93f1109 100644 --- a/python/lib/imaging_lib/nifti.py +++ b/python/lib/imaging_lib/nifti.py @@ -5,9 +5,9 @@ import nibabel as nib -def add_nifti_spatial_file_parameters(nifti_path: str, file_parameters: dict[str, Any]): +def add_nifti_file_parameters(nifti_path: str, nifti_file_hash: str, file_parameters: dict[str, Any]): """ - Read a NIfTI image and add its spatial metadata to the file parameters. + Read a NIfTI image and add some of its properties to the file parameters. """ img = nib.load(nifti_path) # type: ignore @@ -30,6 +30,9 @@ def add_nifti_spatial_file_parameters(nifti_path: str, file_parameters: dict[str else: file_parameters['time'] = None + # Add the file BLAKE2b hash. + file_parameters['file_blake2b_hash'] = nifti_file_hash + def find_dir_nifti_names(dir_path: str) -> Iterator[str]: """ diff --git a/python/lib/imaging_lib/nifti_pic.py b/python/lib/imaging_lib/nifti_pic.py new file mode 100644 index 000000000..1e702b839 --- /dev/null +++ b/python/lib/imaging_lib/nifti_pic.py @@ -0,0 +1,67 @@ +import os +import re + +import nibabel as nib +import numpy as np +from nibabel.nifti1 import Nifti1Image +from nilearn import plotting + +from lib.config import get_data_dir_path_config +from lib.db.models.file import DbFile +from lib.env import Env + + +def create_imaging_pic(env: Env, file: DbFile, is_4d_data: bool) -> str: + """ + Creates the preview pic that will show in the imaging browser view session + page. This pic will be stored in the data_dir/pic folder + + :param file_info: dictionary with file information (path, file_id, cand_id...) + :type file_info: dict + :param pic_rel_path: relative path to the pic to use if one provided. Otherwise + create_imaging_pic will automatically generate the pic name + based on the file path of the NIfTI file + :type pic_rel_path: str + + :return: path to the created pic + :rtype: str + """ + + data_dir_path = get_data_dir_path_config(env) + + cand_id = file.session.candidate.cand_id + file_path = os.path.join(data_dir_path, file.rel_path) + + pic_name = re.sub(r"\.nii(\.gz)?$", f'_{file.id}_check.png', os.path.basename(file.rel_path)) + pic_rel_path = os.path.join(str(cand_id), pic_name) + pic_dir_path = os.path.join(data_dir_path, 'pic', str(cand_id)) + pic_path = os.path.join(data_dir_path, 'pic', pic_rel_path) + + # create the candID directory where the pic will go if it does not already exist + if not os.path.exists(pic_dir_path): + os.mkdir(pic_dir_path) + + img = nib.load(file_path) # type: ignore + + if is_4d_data: + # Only load the first slice of a 4D image. + data = img.dataobj[..., 0] # type: ignore + else: + data = img.dataobj[...] # type: ignore + + # Load the image as float32 for plotting. + volume = Nifti1Image( + data.astype(np.float32, copy=False), # type: ignore + img.affine, # type: ignore + ) + + plotting.plot_anat( # type: ignore + anat_img=volume, + output_file=pic_path, + display_mode='ortho', + black_bg=True, # type: ignore + draw_cross=False, + annotate=False, + ) + + return pic_rel_path diff --git a/python/lib/import_bids_dataset/args.py b/python/lib/import_bids_dataset/args.py new file mode 100644 index 000000000..393c8f1f9 --- /dev/null +++ b/python/lib/import_bids_dataset/args.py @@ -0,0 +1,13 @@ +from dataclasses import dataclass +from typing import Literal + + +@dataclass +class Args: + source_bids_path: str + type: Literal[None, 'raw', 'derivative'] + bids_validation: bool + create_candidate: bool + create_session: bool + copy: bool + verbose: bool diff --git a/python/lib/import_bids_dataset/check_subjects_sessions.py b/python/lib/import_bids_dataset/check_subjects_sessions.py new file mode 100644 index 000000000..fb2e477ac --- /dev/null +++ b/python/lib/import_bids_dataset/check_subjects_sessions.py @@ -0,0 +1,413 @@ +import random +from datetime import datetime + +from dateutil.parser import ParserError, parse +from sqlalchemy.orm import Session as Database + +from lib.config import get_default_bids_visit_label_config +from lib.db.models.candidate import DbCandidate +from lib.db.models.cohort import DbCohort +from lib.db.models.project import DbProject +from lib.db.models.session import DbSession +from lib.db.models.site import DbSite +from lib.db.queries.candidate import try_get_candidate_with_cand_id, try_get_candidate_with_psc_id +from lib.db.queries.cohort import try_get_cohort_with_name +from lib.db.queries.project import try_get_project_with_alias, try_get_project_with_name +from lib.db.queries.session import try_get_session_with_cand_id_visit_label +from lib.db.queries.sex import try_get_sex_with_name +from lib.db.queries.site import try_get_site_with_alias, try_get_site_with_name +from lib.db.queries.visit import try_get_visit_with_visit_label +from lib.env import Env +from lib.imaging_lib.bids.dataset import BidsDataset, BidsSubject +from lib.imaging_lib.bids.tsv_participants import BidsTsvParticipant +from lib.logging import log, log_error, log_error_exit + + +class CheckBidsSubjectSessionError(Exception): + """ + Exception raised if the check or creation of a candidate or session from a BIDS dataset fails. + """ + + def __init__(self, message: str): + super().__init__(message) + + +def check_bids_session_labels( + env: Env, + bids: BidsDataset, +): + """ + Check that all the session labels in a BIDS dataset correspond to a LORIS visit, or exit the + program with an error if that is not the case. + """ + + unknown_session_labels: list[str] = [] + + for session_label in bids.session_labels: + visit = try_get_visit_with_visit_label(env.db, session_label) + if visit is None: + unknown_session_labels.append(session_label) + + if unknown_session_labels != []: + log_error_exit( + env, + ( + f"Found {len(unknown_session_labels)} unknown session labels in the BIDS dataset. Unknown session" + f" labels are: {', '.join(unknown_session_labels)}. Each BIDS session label should correspond to a" + " LORIS visit label." + ) + ) + + +def check_or_create_bids_subjects_and_sessions( + env: Env, + bids: BidsDataset, + create_candidate: bool, + create_session: bool, +) -> int: + """ + Check that the subjects and sessions of a BIDS dataset correspond to LORIS candidates and + sessions, or create them using information extracted from the BIDS dataset if the relevant + arguments are passed. + + Exit the program with an error if the check or creation of any candidate or session fails. + Return the project ID of the last candidate processed. + """ + + try: + # Read the participants.tsv property to raise an exception if the file is incorrect. + bids.tsv_participants + except Exception as exception: + log_error_exit(env, f"Error while reading the participants.tsv file. Full error:\n{exception}") + + candidate = None + errors: list[Exception] = [] + + for subject in bids.subjects: + try: + candidate = check_or_create_bids_subject_and_sessions(env, subject, create_candidate, create_session) + except Exception as error: + log_error(env, str(error)) + errors.append(error) + + if errors != []: + error_message = f"Found {len(errors)} errors while checking BIDS subjects and sessions." + if create_candidate or create_session: + error_message += " No candidate or session has been created." + + log_error_exit(env, error_message) + + if candidate is None: + log_error_exit(env, "No subject found in the BIDS dataset.") + + # Only commit the new candidates and sessions if no error has occured. + env.db.commit() + + # Return the project ID of a candidate of the BIDS dataset. For this value to be used, it + # should be assumed that all the candidates of the BIDS dataset are in the same project. + return candidate.registration_project_id + + +def check_or_create_bids_subject_and_sessions( + env: Env, + subject: BidsSubject, + create_candidate: bool, + create_session: bool, +) -> DbCandidate: + """ + Check that a BIDS subject and its sessions correspond to a LORIS candidate and its sessions, or + create them using information extracted from the BIDS dataset if the relevant arguments are + passed. + + Raise an error if the check or creation of the candidate or any of its sessions fail. Return + the candidate corresponding to the BIDS subject. + """ + + tsv_participant = subject.root_dataset.get_tsv_participant(subject.label) + if tsv_participant is None: + raise CheckBidsSubjectSessionError( + f"No participants.tsv entry found for subject label '{subject.label}' in the BIDS dataset. The BIDS" + " directory subjects do not match the participants.tsv file." + ) + + candidate = check_or_create_bids_subject(env, tsv_participant, create_candidate) + + if create_session: + cohort = get_tsv_participant_cohort(env, tsv_participant) + else: + cohort = None + + for session in subject.sessions: + if session.label is not None: + visit_label = session.label + else: + visit_label = get_default_bids_visit_label_config(env) + + check_or_create_bids_session(env, candidate, cohort, visit_label, create_session) + + return candidate + + +def check_or_create_bids_subject(env: Env, tsv_participant: BidsTsvParticipant, create_candidate: bool) -> DbCandidate: + """ + Check that the subject of a BIDS participants.tsv row exists in LORIS, or create them using the + information of that row if the relevant argument is passed. Raise an exception if the candidate + does not exist or cannot be created. + """ + + try: + cand_id = int(tsv_participant.id) + candidate = try_get_candidate_with_cand_id(env.db, cand_id) + if candidate is None: + raise CheckBidsSubjectSessionError( + f"No LORIS candidate found for the BIDS participant ID '{tsv_participant.id}' (identified as a CandID)." + ) + + return candidate + except ValueError: + pass + + candidate = try_get_candidate_with_psc_id(env.db, tsv_participant.id) + if candidate is not None: + return candidate + + if not create_candidate: + raise CheckBidsSubjectSessionError( + f"No LORIS candidate found for the BIDS participant ID '{tsv_participant.id}' (identified as a PSCID)." + ) + + return create_bids_candidate(env, tsv_participant) + + +def create_bids_candidate(env: Env, tsv_participant: BidsTsvParticipant) -> DbCandidate: + """ + Check a candidate using the information of a BIDS participants.tsv row, or raise an exception + if that candidate cannot be created. + """ + + log(env, f"Creating LORIS candidate for BIDS subject '{tsv_participant.id}'...") + + psc_id = tsv_participant.id + + cand_id = generate_new_cand_id(env.db) + + birth_date = get_tsv_participant_birth_date(tsv_participant) + + sex = get_tsv_participant_sex(env, tsv_participant) + + site = get_tsv_participant_site(env, tsv_participant) + + project = get_tsv_participant_project(env, tsv_participant) + + log( + env, + ( + "Creating candidate with information:\n" + f" PSCID = {psc_id}\n" + f" CandID = {cand_id}\n" + f" Site = {site.name}\n" + f" Project = {project.name}" + ) + ) + + candidate = DbCandidate( + cand_id = cand_id, + psc_id = psc_id, + date_of_birth = birth_date, + sex = sex, + registration_site_id = site.id, + registration_project_id = project.id, + ) + + env.db.add(candidate) + env.db.flush() + + return candidate + + +def check_or_create_bids_session( + env: Env, + candidate: DbCandidate, + cohort: DbCohort | None, + visit_label: str, + create_session: bool, +) -> DbSession: + """ + Check that a BIDS session exists in LORIS, or create it using information previously obtained + from the BIDS dataset if the relevant argument is passed. Raise an exception if the session + does not exist or cannot be created. + """ + + session = try_get_session_with_cand_id_visit_label(env.db, candidate.cand_id, visit_label) + if session is not None: + return session + + if not create_session: + log_error_exit( + env, + f"No session found for candidate '{candidate.psc_id}' and visit label '{visit_label}'." + ) + + return create_bids_session(env, candidate, cohort, visit_label) + + +def create_bids_session(env: Env, candidate: DbCandidate, cohort: DbCohort | None, visit_label: str) -> DbSession: + """ + Create a session using information previously obtained from the BIDS dataset, or raise an + exception if the session does not exist or cannot be created. + """ + + if cohort is None: + log_error_exit(env, f"No cohort found for candidate '{candidate.psc_id}', cannot create session.") + + log( + env, + ( + "Creating session with:\n" + f" PSCID = {candidate.cand_id}\n" + f" Visit label = {visit_label}" + ) + ) + + session = DbSession( + candidate_id = candidate.id, + visit_label = visit_label, + current_stage = 'Not Started', + site_id = candidate.registration_site_id, + project_id = candidate.registration_project_id, + cohort_id = cohort.id, + ) + + env.db.add(session) + env.db.flush() + + return session + + +def get_tsv_participant_birth_date(tsv_participant: BidsTsvParticipant) -> datetime | None: + """ + Get the birth date of a BIDS participants.tsv row, or return `None` if no birth date is + specified. Raise an exception if a birth date is specified but cannot be parsed. + """ + + if tsv_participant.birth_date is None: + return None + + try: + return parse(tsv_participant.birth_date) + except ParserError: + raise CheckBidsSubjectSessionError( + f"Could not parse the BIDS participants.tsv birth date '{tsv_participant.birth_date}'." + ) + + +def get_tsv_participant_sex(env: Env, tsv_participant: BidsTsvParticipant) -> str | None: + """ + Get the sex of a BIDS participants.tsv row, or return `None` if no sex is specified. Raise an + exception if a sex is specified but does not exist in LORIS. + """ + + if tsv_participant.sex is None: + return None + + tsv_participant_sex = tsv_participant.sex.lower() + + if tsv_participant_sex in ['m', 'male']: + sex_name = 'Male' + elif tsv_participant_sex in ['f', 'female']: + sex_name = 'Female' + elif tsv_participant_sex in ['o', 'other']: + sex_name = 'Other' + else: + sex_name = tsv_participant.sex + + sex = try_get_sex_with_name(env.db, sex_name) + if sex is None: + raise CheckBidsSubjectSessionError( + f"No LORIS sex found for the BIDS participants.tsv sex name or alias '{tsv_participant.sex}'." + ) + + return sex.name + + +def get_tsv_participant_site(env: Env, tsv_participant: BidsTsvParticipant) -> DbSite: + """ + Get the site of a BIDS participants.tsv row, or raise an exception if no site is specified or + the site does not exist in LORIS. + """ + + if tsv_participant.site is None: + raise CheckBidsSubjectSessionError( + "No 'site' column found in the BIDS participants.tsv file, this field is required to create candidates or" + " sessions. " + ) + + site = try_get_site_with_name(env.db, tsv_participant.site) + if site is not None: + return site + + site = try_get_site_with_alias(env.db, tsv_participant.site) + if site is not None: + return site + + raise CheckBidsSubjectSessionError( + f"No site found for the BIDS participants.tsv site name or alias '{tsv_participant.site}'." + ) + + +def get_tsv_participant_project(env: Env, tsv_participant: BidsTsvParticipant) -> DbProject: + """ + Get the project of a BIDS participants.tsv row, or raise an exception if no project is + specified or the project does not exist in LORIS. + """ + + if tsv_participant.project is None: + raise CheckBidsSubjectSessionError( + "No 'project' column found in the BIDS participants.tsv file, this field is required to create candidates" + " or sessions. " + ) + + project = try_get_project_with_name(env.db, tsv_participant.project) + if project is not None: + return project + + project = try_get_project_with_alias(env.db, tsv_participant.project) + if project is not None: + return project + + raise CheckBidsSubjectSessionError( + f"No project found for the BIDS participants.tsv project name or alias '{tsv_participant.project}'." + ) + + +def get_tsv_participant_cohort(env: Env, tsv_participant: BidsTsvParticipant) -> DbCohort: + """ + Get the cohort of a BIDS participants.tsv row, or raise an exception if no cohort is specified + or the cohort does not exist in LORIS. + """ + + if tsv_participant.cohort is None: + raise CheckBidsSubjectSessionError( + "No 'cohort' column found in the BIDS participants.tsv file, this field is required to create session." + ) + + cohort = try_get_cohort_with_name(env.db, tsv_participant.cohort) + if cohort is None: + raise CheckBidsSubjectSessionError( + f"No cohort found for the BIDS participants.tsv cohort name '{tsv_participant.cohort}'." + ) + + return cohort + + +# TODO: Move this function to a more appropriate place. +def generate_new_cand_id(db: Database) -> int: + """ + Generate a new random CandID that is not already in the database. + """ + + while True: + cand_id = random.randint(100000, 999999) + candidate = try_get_candidate_with_cand_id(db, cand_id) + if candidate is None: + return cand_id diff --git a/python/lib/import_bids_dataset/env.py b/python/lib/import_bids_dataset/env.py new file mode 100644 index 000000000..8d376d4d5 --- /dev/null +++ b/python/lib/import_bids_dataset/env.py @@ -0,0 +1,29 @@ +from dataclasses import dataclass + + +@dataclass +class BidsImportEnv: + """ + Pipeline-specific variables of the BIDS dataset import pipeline. + """ + + data_dir_path : str + loris_bids_path : str | None + total_files_count : int + imported_files_count : int + ignored_files_count : int + failed_files_count : int + unknown_scan_types : list[str] + + def __init__(self, data_dir_path: str, loris_bids_path: str | None, total_files_count: int): + self.data_dir_path = data_dir_path + self.loris_bids_path = loris_bids_path + self.total_files_count = total_files_count + self.imported_files_count = 0 + self.ignored_files_count = 0 + self.failed_files_count = 0 + self.unknown_scan_types = [] + + @property + def processed_files_count(self) -> int: + return self.imported_files_count + self.ignored_files_count + self.failed_files_count diff --git a/python/lib/import_bids_dataset/events.py b/python/lib/import_bids_dataset/events.py new file mode 100644 index 000000000..29882a2fb --- /dev/null +++ b/python/lib/import_bids_dataset/events.py @@ -0,0 +1,69 @@ +import json +import os +from typing import Any + +import lib.utilities +from lib.database import Database +from lib.env import Env +from lib.imaging_lib.bids.dataset import BidsDataset +from lib.import_bids_dataset.args import Args +from lib.logging import log_warning +from lib.physiological import Physiological +from lib.util.crypto import compute_file_blake2b_hash + + +def get_events_metadata( + env: Env, + args: Args, + bids: BidsDataset, + legacy_db: Database, + loris_bids_path: str | None, + project_id: int, +) -> dict[Any, Any]: + """ + Get the root level 'events.json' data, assuming a singe project for the BIDS dataset. + """ + + root_event_metadata_file = bids.layout.get_nearest( # type: ignore + bids.path, + return_type='tuple', + strict=False, + extension='json', + suffix='events', + all_=False, + subject=None, + session=None, + ) + + if not root_event_metadata_file: + log_warning(env, "No event metadata files (events.json) in the BIDS root directory.") + return {} + + # Copy the event file to the LORIS BIDS import directory. + + copy_file = str.replace(root_event_metadata_file.path, bids.layout.root, '') # type: ignore + + if loris_bids_path is not None: + event_metadata_path = os.path.join(loris_bids_path, copy_file) + lib.utilities.copy_file(root_event_metadata_file.path, event_metadata_path, args.verbose) # type: ignore + + hed_query = 'SELECT * FROM hed_schema_nodes WHERE 1' + hed_union = legacy_db.pselect(query=hed_query, args=()) # type: ignore + + # load json data + with open(root_event_metadata_file.path) as metadata_file: # type: ignore + event_metadata = json.load(metadata_file) + + blake2 = compute_file_blake2b_hash(root_event_metadata_file.path) # type: ignore + physio = Physiological(legacy_db, args.verbose) + _, dataset_tag_dict = physio.insert_event_metadata( # type: ignore + event_metadata=event_metadata, + event_metadata_file=event_metadata_path, # type: ignore + physiological_file_id=None, + project_id=project_id, + blake2=blake2, + project_wide=True, + hed_union=hed_union # type: ignore + ) + + return dataset_tag_dict # type: ignore diff --git a/python/lib/import_bids_dataset/imaging.py b/python/lib/import_bids_dataset/imaging.py new file mode 100644 index 000000000..9390aa18b --- /dev/null +++ b/python/lib/import_bids_dataset/imaging.py @@ -0,0 +1,48 @@ +from typing import Any + +from lib.db.queries.parameter_type import get_all_parameter_types +from lib.env import Env + + +def map_bids_param_to_loris_param(env: Env, file_parameters: dict[str, Any]): + """ + Maps the BIDS parameters found in the BIDS JSON file with the + parameter type names of LORIS. + + :param file_parameters: dictionary with the list of parameters + found in the BIDS JSON file + :type file_parameters: dict + + :return: returns a dictionary with the BIDS JSON parameter names + as well as their LORIS equivalent + :rtype: dict + """ + + parameter_types_mapping = get_bids_to_minc_parameter_types_mapping(env) + + # Map BIDS parameters with the LORIS ones. + for file_parameter in list(file_parameters.keys()): + file_parameter_type = parameter_types_mapping.get(file_parameter) + if file_parameter_type is not None: + file_parameters[file_parameter_type] = file_parameters[file_parameter] + + +def get_bids_to_minc_parameter_types_mapping(env: Env) -> dict[str, str]: + """ + Queries the BIDS to MINC mapping dictionary stored in the paramater_type table and returns a + dictionary with the BIDS term as keys and the MINC terms as values. + + :return: BIDS to MINC mapping dictionary + :rtype: dict + """ + + parameter_types = get_all_parameter_types(env.db) + + parameter_types_mapping: dict[str, str] = {} + for parameter_type in parameter_types: + if parameter_type.alias is None: + continue + + parameter_types_mapping[parameter_type.alias] = parameter_type.name + + return parameter_types_mapping diff --git a/python/lib/import_bids_dataset/main.py b/python/lib/import_bids_dataset/main.py new file mode 100644 index 000000000..2c1a601ff --- /dev/null +++ b/python/lib/import_bids_dataset/main.py @@ -0,0 +1,315 @@ +import os +import re +import shutil +from typing import Any + +from lib.config import get_data_dir_path_config, get_default_bids_visit_label_config +from lib.database import Database +from lib.db.models.session import DbSession +from lib.db.queries.candidate import try_get_candidate_with_psc_id +from lib.db.queries.session import try_get_session_with_cand_id_visit_label +from lib.eeg import Eeg +from lib.env import Env +from lib.imaging_lib.bids.dataset import BidsDataset, BidsDataType, BidsSession +from lib.imaging_lib.bids.dataset_description import BidsDatasetDescriptionError +from lib.imaging_lib.bids.tsv_participants import ( + BidsTsvParticipant, + merge_bids_tsv_participants, + read_bids_participants_tsv_file, + write_bids_participants_tsv_file, +) +from lib.imaging_lib.bids.tsv_scans import ( + BidsTsvScan, + merge_bids_tsv_scans, + read_bids_scans_tsv_file, + write_bids_scans_tsv_file, +) +from lib.import_bids_dataset.args import Args +from lib.import_bids_dataset.check_subjects_sessions import ( + check_bids_session_labels, + check_or_create_bids_subjects_and_sessions, +) +from lib.import_bids_dataset.env import BidsImportEnv +from lib.import_bids_dataset.events import get_events_metadata +from lib.import_bids_dataset.mri import import_bids_nifti +from lib.import_bids_dataset.print import print_bids_import_summary +from lib.logging import log, log_error, log_error_exit, log_warning +from lib.util.iter import count + +BIDS_EEG_DATA_TYPES = ['eeg', 'ieeg'] + +BIDS_MRI_DATA_TYPES = ['anat', 'dwi', 'fmap', 'func'] + + +def import_bids_dataset(env: Env, args: Args, legacy_db: Database): + """ + Read the provided BIDS dataset and import it into LORIS. + """ + + data_dir_path = get_data_dir_path_config(env) + + log(env, "Parsing BIDS dataset...") + + bids = BidsDataset(args.source_bids_path, args.bids_validation) + + niftis_count = count(bids.niftis) + + log(env, f"Found {niftis_count} NIfTI files.") + + log(env, f"Found {len(bids.subject_labels)} subjects:") + for subject_label in bids.subject_labels: + log(env, f"- {subject_label}") + + log(env, f"Found {len(bids.session_labels)} sessions:") + for session_label in bids.session_labels: + log(env, f"- {session_label}") + + # Check the BIDS subject and session labels and create their candidates and sessions in LORIS + # if needed. + + check_bids_session_labels(env, bids) + + project_id = check_or_create_bids_subjects_and_sessions(env, bids, args.create_candidate, args.create_session) + + # Get the LORIS BIDS import directory path and create the directory if needed. + + if args.copy: + loris_bids_path = get_loris_bids_path(env, bids, data_dir_path) + else: + loris_bids_path = None + + # Get the BIDS events metadata. + + events_metadata = get_events_metadata(env, args, bids, legacy_db, loris_bids_path, project_id) + + # Copy the `participants.tsv` file rows. + + if loris_bids_path is not None and bids.tsv_participants is not None: + loris_participants_tsv_path = os.path.join(loris_bids_path, 'participants.tsv') + copy_bids_tsv_participants(bids.tsv_participants, loris_participants_tsv_path) + + # Process each session directory. + + import_env = BidsImportEnv( + data_dir_path = data_dir_path, + loris_bids_path = loris_bids_path, + total_files_count = niftis_count, + ) + + for bids_session in bids.sessions: + import_bids_session(env, import_env, args, bids_session, events_metadata, legacy_db) + + # Copy the static BIDS files. + + if loris_bids_path is not None: + copy_static_dataset_files(bids.path, loris_bids_path) + + # Print import summary. + + print_bids_import_summary(env, import_env) + + +def import_bids_session( + env: Env, + import_env: BidsImportEnv, + args: Args, + bids_session: BidsSession, + events_metadata: dict[Any, Any], + legacy_db: Database, +): + """ + Read the provided BIDS session directory and import it into LORIS. + """ + + log(env, f"Importing files for subject '{bids_session.subject.label}' and session '{bids_session.label}'.") + + candidate = try_get_candidate_with_psc_id(env.db, bids_session.subject.label) + if candidate is None: + # This should not happen as BIDS subject labels should have been checked previously. + log_error_exit(env, f"Candidate not found for PSCID '{bids_session.subject.label}'.") + + if bids_session.label is not None: + visit_label = bids_session.label + else: + visit_label = get_default_bids_visit_label_config(env) + + session = try_get_session_with_cand_id_visit_label(env.db, candidate.cand_id, visit_label) + if session is None: + # This should not happen as BIDS session labels should have been checked previously. + log_error_exit(env, f"Visit not found for visit label '{visit_label}'.") + + try: + # Read the scans.tsv property to raise an exception if the file is incorrect. + tsv_scans = bids_session.tsv_scans + + if import_env.loris_bids_path is not None and tsv_scans is not None: + loris_scans_tsv_path = os.path.join( + import_env.loris_bids_path, + f'sub-{bids_session.subject.label}', + f'ses-{bids_session.label}', + f'sub-{bids_session.subject.label}_ses-{bids_session.label}_scans.tsv', + ) + + copy_bids_tsv_scans(tsv_scans, loris_scans_tsv_path) + except Exception as exception: + log_warning( + env, + f"Error while reading the session scans.tsv file, scans.tsv data will be ignored. Full error:\n{exception}" + ) + + # Process each data type directory. + + for data_type in bids_session.data_types: + import_bids_data_type_files(env, import_env, args, session, data_type, events_metadata, legacy_db) + + +def import_bids_data_type_files( + env: Env, + import_env: BidsImportEnv, + args: Args, + session: DbSession, + data_type: BidsDataType, + events_metadata: dict[Any, Any], + legacy_db: Database, +): + """ + Read the provided BIDS data type directory and import it into LORIS. + """ + + if data_type.name in BIDS_MRI_DATA_TYPES: + import_bids_mri_data_type_files(env, import_env, args, session, data_type) + elif data_type.name in BIDS_EEG_DATA_TYPES: + import_bids_eeg_data_type_files(env, import_env, args, session, data_type, events_metadata, legacy_db) + else: + log_warning(env, f"Unknown data type '{data_type.name}'. Skipping.") + + +def import_bids_mri_data_type_files( + env: Env, + import_env: BidsImportEnv, + args: Args, + session: DbSession, + data_type: BidsDataType, +): + """ + Read the BIDS MRI data type directory and import its files into LORIS. + """ + + if args.type == 'derivative': + log_error_exit(env, "Derivative data is not support for BIDS MRI import yet.") + + if not args.copy: + log_error_exit(env, "No copy import is not support for BIDS MRI import yet.") + + for nifti in data_type.niftis: + try: + import_bids_nifti(env, import_env, session, nifti) + except Exception as exception: + import_env.failed_files_count += 1 + log_error( + env, + ( + f"Error while importing MRI file '{nifti.name}'. Error message:\n" + f"{exception}\n" + "Skipping." + ) + ) + + +def import_bids_eeg_data_type_files( + env: Env, + import_env: BidsImportEnv, + args: Args, + session: DbSession, + data_type: BidsDataType, + events_metadata: dict[Any, Any], + legacy_db: Database, +): + """ + Read the provided BIDS EEG data type directory and import it into LORIS. + """ + + loris_data_type_dir_rel_path = os.path.join( + f'sub-{session.candidate.psc_id}', + f'ses-{session.visit_label}', + data_type.name, + ) + + Eeg( + data_type = data_type, + db = legacy_db, + verbose = env.verbose, + data_dir = import_env.data_dir_path, + session = session, + loris_bids_eeg_rel_dir = loris_data_type_dir_rel_path, + loris_bids_root_dir = import_env.loris_bids_path, + dataset_tag_dict = events_metadata, + dataset_type = args.type, + ) + + +def copy_bids_tsv_participants(tsv_participants: dict[str, BidsTsvParticipant], loris_participants_tsv_path: str): + """ + Copy some participants.tsv rows into the LORIS participants.tsv file, creating it if necessary. + """ + + if os.path.exists(loris_participants_tsv_path): + loris_tsv_participants = read_bids_participants_tsv_file(loris_participants_tsv_path) + merge_bids_tsv_participants(tsv_participants, loris_tsv_participants) + + write_bids_participants_tsv_file(tsv_participants, loris_participants_tsv_path) + + +def copy_bids_tsv_scans(tsv_scans: dict[str, BidsTsvScan], loris_scans_tsv_path: str): + """ + Copy some scans.tsv rows into a LORIS scans.tsv file, creating it if necessary. + """ + + if os.path.exists(loris_scans_tsv_path): + loris_tsv_scans = read_bids_scans_tsv_file(loris_scans_tsv_path) + merge_bids_tsv_scans(tsv_scans, loris_tsv_scans) + + write_bids_scans_tsv_file(tsv_scans, loris_scans_tsv_path) + + +def copy_static_dataset_files(source_bids_path: str, loris_bids_path: str): + """ + Copy the static files of the source BIDS dataset to the LORIS BIDS dataset. + """ + + for file_name in ['README', 'dataset_description.json']: + source_file_path = os.path.join(source_bids_path, file_name) + if not os.path.isfile(source_file_path): + continue + + loris_file_path = os.path.join(loris_bids_path, file_name) + shutil.copyfile(source_file_path, loris_file_path) + + +def get_loris_bids_path(env: Env, bids: BidsDataset, data_dir_path: str) -> str: + """ + Get the LORIS BIDS directory path for the BIDS dataset to import, and create that directory if + it does not exist yet. + """ + + try: + dataset_description = bids.get_dataset_description() + except BidsDatasetDescriptionError as error: + log_error_exit(env, str(error)) + + if dataset_description is None: + log_error_exit( + env, + "No file 'dataset_description.json' found in the input BIDS dataset.", + ) + + # Sanitize the dataset metadata to have a usable name for the directory. + dataset_name = re.sub(r'[^0-9a-zA-Z]+', '_', dataset_description.name) + dataset_version = re.sub(r'[^0-9a-zA-Z\.]+', '_', dataset_description.bids_version) + + loris_bids_path = os.path.join(data_dir_path, 'bids_imports', f'{dataset_name}_BIDSVersion_{dataset_version}') + + if not os.path.exists(loris_bids_path): + os.mkdir(loris_bids_path) + + return loris_bids_path diff --git a/python/lib/import_bids_dataset/mri.py b/python/lib/import_bids_dataset/mri.py new file mode 100644 index 000000000..1538645a4 --- /dev/null +++ b/python/lib/import_bids_dataset/mri.py @@ -0,0 +1,224 @@ +import os +import shutil +from typing import Any, cast + +from lib.db.models.mri_scan_type import DbMriScanType +from lib.db.models.session import DbSession +from lib.db.queries.file import try_get_file_with_hash, try_get_file_with_rel_path +from lib.db.queries.mri_scan_type import try_get_mri_scan_type_with_name +from lib.env import Env +from lib.imaging_lib.bids.dataset import BidsNifti +from lib.imaging_lib.bids.json import add_bids_json_file_parameters +from lib.imaging_lib.bids.tsv_scans import add_scan_tsv_file_parameters +from lib.imaging_lib.bids.util import determine_bids_file_type +from lib.imaging_lib.file import register_imaging_file +from lib.imaging_lib.file_parameter import register_file_parameter, register_file_parameters +from lib.imaging_lib.mri_scan_type import create_mri_scan_type +from lib.imaging_lib.nifti import add_nifti_file_parameters +from lib.imaging_lib.nifti_pic import create_imaging_pic +from lib.import_bids_dataset.env import BidsImportEnv +from lib.logging import log, log_warning +from lib.util.crypto import compute_file_blake2b_hash +from lib.util.fs import get_file_extension + +KNOWN_SUFFIXES_PER_MRI_DATA_TYPE = { + 'anat': [ + 'T1w', 'T2w', 'T1rho', 'T1map', 'T2map', 'T2star', 'FLAIR', 'FLASH', 'PD', 'PDmap', 'PDT2', + 'inplaneT1', 'inplaneT2', 'angio', + ], + 'func': [ + 'bold', 'cbv', 'phase', + ], + 'dwi': [ + 'dwi', 'sbref', + ], + 'fmap': [ + 'phasediff', 'magnitude1', 'magnitude2', 'phase1', 'phase2', 'fieldmap', 'epi', + ], +} + + +def import_bids_nifti(env: Env, import_env: BidsImportEnv, session: DbSession, nifti: BidsNifti): + """ + Import a BIDS NIfTI file and its associated files in LORIS. + """ + + log( + env, + ( + f"Importing MRI file '{nifti.name}'... ({import_env.processed_files_count + 1}" + f" / {import_env.total_files_count})" + ), + ) + + # Get the relevant `scans.tsv` row if there is one. + + tsv_scan = nifti.session.get_tsv_scan(nifti.name) + if tsv_scan is None: + log_warning(env, f"No scans.tsv row found for file '{nifti.name}', scans.tsv data will be ignored.") + + # Get the path at which to copy the file. + + loris_file_dir_path = os.path.join( + cast(str, import_env.loris_bids_path), + f'sub-{session.candidate.psc_id}', + f'ses-{session.visit_label}', + nifti.data_type.name, + ) + + loris_file_path = os.path.join(loris_file_dir_path, nifti.name) + + loris_file_rel_path = os.path.relpath(loris_file_path, import_env.data_dir_path) + + # Check whether the file is already registered in LORIS. + + loris_file = try_get_file_with_rel_path(env.db, loris_file_rel_path) + if loris_file is not None: + import_env.ignored_files_count += 1 + log(env, f"File '{loris_file_rel_path}' is already registered in LORIS. Skipping.") + return + + # Get information about the file. + + file_type = get_check_nifti_imaging_file_type(env, nifti) + file_hash = get_check_nifti_file_hash(env, nifti) + mri_scan_type = get_nifti_mri_scan_type(env, import_env, nifti) + + # Get the auxiliary files. + + aux_file_paths: list[str] = [] + + json_path = nifti.get_json_path() + + bval_path = nifti.get_bval_path() + if bval_path is not None: + aux_file_paths.append(bval_path) + + bvec_path = nifti.get_bvec_path() + if bvec_path is not None: + aux_file_paths.append(bvec_path) + + # Get the file parameters. + + file_parameters: dict[str, Any] = {} + + if json_path is not None: + json_loris_path = os.path.join(loris_file_dir_path, os.path.basename(json_path)) + json_loris_rel_path = os.path.relpath(json_loris_path, import_env.data_dir_path) + add_bids_json_file_parameters(env, json_path, json_loris_rel_path, file_parameters) + + add_nifti_file_parameters(nifti.path, file_hash, file_parameters) + + if nifti.session.tsv_scans_path is not None and tsv_scan is not None: + add_scan_tsv_file_parameters(tsv_scan, nifti.session.tsv_scans_path, file_parameters) + + for aux_file_path in aux_file_paths: + aux_file_type = get_file_extension(aux_file_path) + aux_file_hash = compute_file_blake2b_hash(aux_file_path) + aux_file_loris_path = os.path.join(loris_file_dir_path, os.path.basename(aux_file_path)) + aux_file_loris_rel_path = os.path.relpath(aux_file_loris_path, import_env.data_dir_path) + file_parameters[f'bids_{aux_file_type}'] = aux_file_loris_rel_path + file_parameters[f'bids_{aux_file_type}_blake2b_hash'] = aux_file_hash + + # Copy the files on the file system. + + copy_bids_file(loris_file_dir_path, nifti.path) + + if json_path is not None: + copy_bids_file(loris_file_dir_path, json_path) + + for aux_file_path in aux_file_paths: + copy_bids_file(loris_file_dir_path, aux_file_path) + + # Register the file and its parameters in the database. + + echo_time = file_parameters.get('EchoTime') + echo_number = file_parameters.get('EchoNumber') + phase_encoding_direction = file_parameters.get('PhaseEncodingDirection') + + file = register_imaging_file( + env, + file_type, + loris_file_rel_path, + session, + mri_scan_type, + echo_time, + echo_number, + phase_encoding_direction, + ) + + register_file_parameters(env, file, file_parameters) + + # Create and register the file picture. + + pic_rel_path = create_imaging_pic(env, file, True if 'time' in file_parameters else False) + + register_file_parameter(env, file, 'check_pic_filename', pic_rel_path) + + import_env.imported_files_count += 1 + + +def get_check_nifti_imaging_file_type(env: Env, nifti: BidsNifti) -> str: + """ + Get the BIDS file type of a NIfTI file and raise an exception if that file type is not + registered in the database. + """ + + file_type = determine_bids_file_type(env, nifti.name) + if file_type is None: + raise Exception("No matching file type found in the database.") + + return file_type + + +def get_check_nifti_file_hash(env: Env, nifti: BidsNifti) -> str: + """ + Compute the BLAKE2b hash of a NIfTI file and raise an exception if that hash is already + registered in the database. + """ + + file_hash = compute_file_blake2b_hash(nifti.path) + + file = try_get_file_with_hash(env.db, file_hash) + if file is not None: + raise Exception(f"File with hash '{file_hash}' already present in the database.") + + return file_hash + + +def get_nifti_mri_scan_type(env: Env, import_env: BidsImportEnv, nifti: BidsNifti) -> DbMriScanType | None: + """ + Get the MRI scan type corresponding to a NIfTI file using its BIDS suffix. Create the MRI scan + type in the database the suffix is a standard BIDS suffix and the scan type does not already + exist in the database, or raise an exception if no known scan type is found. + """ + + if nifti.suffix is None: + raise Exception("No BIDS suffix found in the NIfTI file name, cannot infer the file data type.") + + mri_scan_type = try_get_mri_scan_type_with_name(env.db, nifti.suffix) + if mri_scan_type is not None: + return mri_scan_type + + if nifti.suffix not in KNOWN_SUFFIXES_PER_MRI_DATA_TYPE[nifti.data_type.name]: + if nifti.suffix not in import_env.unknown_scan_types: + import_env.unknown_scan_types.append(nifti.suffix) + + raise Exception(f"Found unknown MRI file suffix '{nifti.suffix}'.") + + return create_mri_scan_type(env, nifti.suffix) + + +def copy_bids_file(loris_file_dir_path: str, file_path: str): + """ + Copy a BIDS file to a directory. + """ + + file_name = os.path.basename(file_path) + loris_file_path = os.path.join(loris_file_dir_path, file_name) + + if os.path.exists(loris_file_path): + raise Exception(f"File '{loris_file_path}' already exists in LORIS.") + + os.makedirs(loris_file_dir_path, exist_ok=True) + shutil.copyfile(file_path, loris_file_path) diff --git a/python/lib/import_bids_dataset/print.py b/python/lib/import_bids_dataset/print.py new file mode 100644 index 000000000..8ae1fb936 --- /dev/null +++ b/python/lib/import_bids_dataset/print.py @@ -0,0 +1,30 @@ +from lib.env import Env +from lib.import_bids_dataset.env import BidsImportEnv +from lib.logging import log + + +def print_bids_import_summary(env: Env, import_env: BidsImportEnv): + """ + Print a summary of this BIDS import process. + """ + + log( + env, + ( + f"Processed {import_env.processed_files_count} MRI files, including {import_env.imported_files_count}" + f" imported files, {import_env.ignored_files_count} ignored files, and {import_env.failed_files_count}" + " errors." + ), + ) + + if import_env.unknown_scan_types != []: + import_env.unknown_scan_types.sort() + + unknwon_scan_types_string = "" + for unknown_scan_type in import_env.unknown_scan_types: + unknwon_scan_types_string += f"\n- {unknown_scan_type}" + + log( + env, + f"Found {len(import_env.unknown_scan_types)} unknown MRI scan types:{unknwon_scan_types_string}" + ) diff --git a/python/lib/mri.py b/python/lib/mri.py deleted file mode 100644 index 03259b5ca..000000000 --- a/python/lib/mri.py +++ /dev/null @@ -1,455 +0,0 @@ -"""Deals with MRI BIDS datasets and register them into the database.""" - -import getpass -import json -import os -import re -import sys - -import lib.exitcode -import lib.utilities as utilities -from lib.candidate import Candidate -from lib.imaging import Imaging -from lib.scanstsv import ScansTSV -from lib.session import Session -from lib.util.crypto import compute_file_blake2b_hash - - -class Mri: - """ - This class reads the BIDS MRI data structure and registers the MRI datasets into the - database by calling lib.imaging class. - - :Example: - - from lib.bidsreader import BidsReader - from lib.mri import Mri - from lib.database import Database - - # database connection - db = Database(config_file.mysql, verbose) - db.connect() - - # grep config settings from the Config module - config_obj = Config(db, verbose) - default_bids_vl = config_obj.get_config('default_bids_vl') - data_dir = config_obj.get_config('dataDirBasepath') - - # load the BIDS directory - bids_reader = BidsReader(bids_dir) - - # create the LORIS_BIDS directory in data_dir based on Name and BIDS version - loris_bids_root_dir = create_loris_bids_directory( - bids_reader, data_dir, verbose - ) - for row in bids_reader.cand_session_modalities_list: - for modality in row['modalities']: - if modality in ['anat', 'dwi', 'fmap', 'func']: - bids_session = row['bids_ses_id'] - visit_label = bids_session if bids_session else default_bids_vl - loris_bids_mri_rel_dir = "sub-" + row['bids_sub_id'] + "/" + \ - "ses-" + visit_label + "/mri/" - lib.utilities.create_dir( - loris_bids_root_dir + loris_bids_mri_rel_dir, verbose - ) - Eeg( - bids_reader = bids_reader, - bids_sub_id = row['bids_sub_id'], - bids_ses_id = row['bids_ses_id'], - bids_modality = modality, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_eeg_rel_dir = loris_bids_mri_rel_dir, - loris_bids_root_dir = loris_bids_root_dir - ) - - # disconnect from the database - db.disconnect() - """ - - def __init__(self, bids_reader, bids_sub_id, bids_ses_id, bids_modality, db, - verbose, data_dir, default_visit_label, - loris_bids_mri_rel_dir, loris_bids_root_dir): - - # enumerate the different suffixes supported by BIDS per modality type - self.possible_suffix_per_modality = { - 'anat' : [ - 'T1w', 'T2w', 'T1rho', 'T1map', 'T2map', 'T2star', 'FLAIR', - 'FLASH', 'PD', 'PDmap', 'PDT2', 'inplaneT1', 'inplaneT2', 'angio' - ], - 'func' : [ - 'bold', 'cbv', 'phase' - ], - 'dwi' : [ - 'dwi', 'sbref' - ], - 'fmap' : [ - 'phasediff', 'magnitude1', 'magnitude2', 'phase1', 'phase2', - 'fieldmap', 'epi' - ] - } - - # load bids objects - self.bids_reader = bids_reader - self.bids_layout = bids_reader.bids_layout - - # load the LORIS BIDS import root directory where the files will be copied - self.loris_bids_mri_rel_dir = loris_bids_mri_rel_dir - self.loris_bids_root_dir = loris_bids_root_dir - self.data_dir = data_dir - - # load BIDS subject, visit and modality - self.bids_sub_id = bids_sub_id - self.bids_ses_id = bids_ses_id - self.bids_modality = bids_modality - - # load database handler object and verbose bool - self.db = db - self.verbose = verbose - - # find corresponding CandID and SessionID in LORIS - self.loris_cand_info = self.get_loris_cand_info() - self.default_vl = default_visit_label - self.psc_id = self.loris_cand_info['PSCID'] - self.cand_id = self.loris_cand_info['CandID'] - self.center_id = self.loris_cand_info['RegistrationCenterID'] - self.project_id = self.loris_cand_info['RegistrationProjectID'] - self.cohort_id = None - for row in bids_reader.participants_info: - if not row['participant_id'] == self.psc_id: - continue - if 'cohort' in row: - cohort_info = db.pselect( - "SELECT CohortID FROM cohort WHERE title = %s", - [row['cohort'], ] - ) - if len(cohort_info) > 0: - self.cohort_id = cohort_info[0]['CohortID'] - break - - self.session_id = self.get_loris_session_id() - - # grep all the NIfTI files for the modality - self.nifti_files = self.grep_nifti_files() - - # check if a tsv with acquisition dates or age is available for the subject - self.scans_file = None - if self.bids_layout.get(suffix='scans', subject=self.psc_id, return_type='filename'): - self.scans_file = self.bids_layout.get(suffix='scans', subject=self.psc_id, - return_type='filename', extension='tsv')[0] - - # loop through NIfTI files and register them in the DB - for nifti_file in self.nifti_files: - self.register_raw_file(nifti_file) - - def get_loris_cand_info(self): - """ - Gets the LORIS Candidate info for the BIDS subject. - - :return: Candidate info of the subject found in the database - :rtype: list - """ - - candidate = Candidate(verbose=self.verbose, psc_id=self.bids_sub_id) - loris_cand_info = candidate.get_candidate_info_from_loris(self.db) - - return loris_cand_info - - def get_loris_session_id(self): - """ - Greps the LORIS session.ID corresponding to the BIDS visit. Note, - if no BIDS visit are set, will use the default visit label value set - in the config module - - :return: the session's ID in LORIS - :rtype: int - """ - - # check if there are any visit label in BIDS structure, if not, - # will use the default visit label set in the config module - visit_label = self.bids_ses_id if self.bids_ses_id else self.default_vl - - session = Session( - self.db, self.verbose, self.cand_id, visit_label, - self.center_id, self.project_id, self.cohort_id - ) - loris_vl_info = session.get_session_info_from_loris() - - if not loris_vl_info: - message = "ERROR: visit label " + visit_label + "does not exist in " + \ - "the session table for candidate " + self.cand_id + \ - "\nPlease make sure the visit label is created in the " + \ - "database or run bids_import.py with the -s option -s if " + \ - "you wish that the insertion pipeline creates the visit " + \ - "label in the session table." - print(message) - exit(lib.exitcode.SELECT_FAILURE) - - return loris_vl_info['ID'] - - def grep_nifti_files(self): - """ - Returns the list of NIfTI files found for the modality. - - :return: list of NIfTI files found for the modality - :rtype: list - """ - - # grep all the possible suffixes for the modality - modality_possible_suffix = self.possible_suffix_per_modality[self.bids_modality] - - # loop through the possible suffixes and grep the NIfTI files - nii_files_list = [] - for suffix in modality_possible_suffix: - nii_files_list.extend(self.grep_bids_files(suffix, 'nii.gz')) - - # return the list of found NIfTI files - return nii_files_list - - def grep_bids_files(self, bids_type, extension): - """ - Greps the BIDS files and their layout information from the BIDSLayout - and return that list. - - :param bids_type: the BIDS type to use to grep files (T1w, T2w, bold, dwi...) - :type bids_type: str - :param extension: extension of the file to look for (nii.gz, json...) - :type extension: str - - :return: list of files from the BIDS layout - :rtype: list - """ - - if self.bids_ses_id: - return self.bids_layout.get( - subject = self.bids_sub_id, - session = self.bids_ses_id, - datatype = self.bids_modality, - extension = extension, - suffix = bids_type - ) - else: - return self.bids_layout.get( - subject = self.bids_sub_id, - datatype = self.bids_modality, - extension = extension, - suffix = bids_type - ) - - def register_raw_file(self, nifti_file): - """ - Registers raw MRI files and related files into the files and parameter_file tables. - - :param nifti_file: NIfTI file object - :type nifti_file: pybids NIfTI file object - """ - - # insert the NIfTI file - self.fetch_and_insert_nifti_file(nifti_file) - - def fetch_and_insert_nifti_file(self, nifti_file, derivatives=None): - """ - Gather NIfTI file information to insert into the files and parameter_file tables. - Once all the information has been gathered, it will call imaging.insert_imaging_file - that will perform the insertion into the files and parameter_file tables. - - :param nifti_file : NIfTI file object - :type nifti_file : pybids NIfTI file object - :param derivatives: whether the file to be registered is a derivative file - :type derivatives: bool - - :return: dictionary with the inserted file_id and file_path - :rtype: dict - """ - - # load the Imaging object that will be used to insert the imaging data into the database - imaging = Imaging(self.db, self.verbose) - - # load the list of associated files with the NIfTI file - associated_files = nifti_file.get_associations() - - # load the entity information from the NIfTI file - entities = nifti_file.get_entities() - scan_type = entities['suffix'] - - # loop through the associated files to grep JSON, bval, bvec... - json_file = None - other_assoc_files = {} - for assoc_file in associated_files: - file_info = assoc_file.get_entities() - if re.search(r'json$', file_info['extension']): - json_file = assoc_file.path - elif re.search(r'bvec$', file_info['extension']): - other_assoc_files['bvec_file'] = assoc_file.path - elif re.search(r'bval$', file_info['extension']): - other_assoc_files['bval_file'] = assoc_file.path - elif re.search(r'tsv$', file_info['extension']) and file_info['suffix'] == 'events': - other_assoc_files['task_file'] = assoc_file.path - elif re.search(r'tsv$', file_info['extension']) and file_info['suffix'] == 'physio': - other_assoc_files['physio_file'] = assoc_file.path - - # read the json file if it exists - file_parameters = {} - if json_file: - with open(json_file) as data_file: - file_parameters = json.load(data_file) - file_parameters = imaging.map_bids_param_to_loris_param(file_parameters) - # copy the JSON file to the LORIS BIDS import directory - json_path = self.copy_file_to_loris_bids_dir(json_file) - file_parameters['bids_json_file'] = json_path - json_blake2 = compute_file_blake2b_hash(json_file) - file_parameters['bids_json_file_blake2b_hash'] = json_blake2 - - # grep the file type from the ImagingFileTypes table - file_type = imaging.determine_file_type(nifti_file.filename) - if not file_type: - message = "\nERROR: File type for " + nifti_file.filename \ - + " does not exist in ImagingFileTypes database table\n" - print(message) - sys.exit(lib.exitcode.SELECT_FAILURE) - - # determine the output type - output_type = 'derivatives' if derivatives else 'native' - if not derivatives: - coordinate_space = 'native' - - # get the acquisition date of the MRI or the age at the time of acquisition - if self.scans_file: - scan_info = ScansTSV(self.scans_file, nifti_file.filename, self.verbose) - file_parameters['scan_acquisition_time'] = scan_info.get_acquisition_time() - file_parameters['age_at_scan'] = scan_info.get_age_at_scan() - # copy the scans.tsv file to the LORIS BIDS import directory - scans_path = scan_info.copy_scans_tsv_file_to_loris_bids_dir( - self.bids_sub_id, self.loris_bids_root_dir, self.data_dir - ) - file_parameters['scans_tsv_file'] = scans_path - scans_blake2 = compute_file_blake2b_hash(self.scans_file) - file_parameters['scans_tsv_file_bake2hash'] = scans_blake2 - - # grep voxel step from the NIfTI file header - step_parameters = imaging.get_nifti_image_step_parameters(nifti_file.path) - file_parameters['xstep'] = step_parameters[0] - file_parameters['ystep'] = step_parameters[1] - file_parameters['zstep'] = step_parameters[2] - - # grep the time length from the NIfTI file header - is_4d_dataset = False - length_parameters = imaging.get_nifti_image_length_parameters(nifti_file.path) - if len(length_parameters) == 4: - file_parameters['time'] = length_parameters[3] - is_4d_dataset = True - - # add all other associated files to the file_parameters so they get inserted - # in parameter_file - for type in other_assoc_files: - original_file_path = other_assoc_files[type] - copied_path = self.copy_file_to_loris_bids_dir(original_file_path) - file_param_name = 'bids_' + type - file_parameters[file_param_name] = copied_path - file_blake2 = compute_file_blake2b_hash(original_file_path) - hash_param_name = file_param_name + '_blake2b_hash' - file_parameters[hash_param_name] = file_blake2 - - # append the blake2b to the MRI file parameters dictionary - blake2 = compute_file_blake2b_hash(nifti_file.path) - file_parameters['file_blake2b_hash'] = blake2 - - # check that the file is not already inserted before inserting it - result = imaging.grep_file_info_from_hash(blake2) - file_id = result['FileID'] if result else None - file_path = result['File'] if result else None - if not file_id: - # grep the scan type ID from the mri_scan_type table (if it is not already in - # the table, it will add a row to the mri_scan_type table) - scan_type_id = self.db.grep_id_from_lookup_table( - id_field_name = 'MriScanTypeID', - table_name = 'mri_scan_type', - where_field_name = 'MriScanTypeName', - where_value = scan_type, - insert_if_not_found = True - ) - - # copy the NIfTI file to the LORIS BIDS import directory - file_path = self.copy_file_to_loris_bids_dir(nifti_file.path) - - # insert the file along with its information into files and parameter_file tables - echo_time = file_parameters['EchoTime'] if 'EchoTime' in file_parameters.keys() else None - echo_nb = file_parameters['EchoNumber'] if 'EchoNumber' in file_parameters.keys() else None - phase_enc_dir = file_parameters['PhaseEncodingDirection'] \ - if 'PhaseEncodingDirection' in file_parameters.keys() else None - file_info = { - 'FileType' : file_type, - 'File' : file_path, - 'SessionID' : self.session_id, - 'InsertedByUserID': getpass.getuser(), - 'CoordinateSpace' : coordinate_space, - 'OutputType' : output_type, - 'EchoTime' : echo_time, - 'PhaseEncodingDirection': phase_enc_dir, - 'EchoNumber' : echo_nb, - 'SourceFileID' : None, - 'MriScanTypeID' : scan_type_id - } - file_id = imaging.insert_imaging_file(file_info, file_parameters) - - # create the pic associated with the file - pic_rel_path = imaging.create_imaging_pic( - { - 'cand_id' : self.cand_id, - 'data_dir_path': self.data_dir, - 'file_rel_path': file_path, - 'is_4D_dataset': is_4d_dataset, - 'file_id' : file_id - } - ) - if os.path.exists(os.path.join(self.data_dir, 'pic/', pic_rel_path)): - imaging.insert_parameter_file(file_id, 'check_pic_filename', pic_rel_path) - - return {'file_id': file_id, 'file_path': file_path} - - def copy_file_to_loris_bids_dir(self, file, derivatives_path=None): - """ - Wrapper around the utilities.copy_file function that copies the file - to the LORIS BIDS import directory and returns the relative path of the - file (without the data_dir part). - - :param file: full path to the original file - :type file: str - :param derivatives_path: path to the derivative folder - :type derivatives_path: str - - :return: relative path to the copied file - :rtype: str - """ - - # determine the path of the copied file - copy_file = self.loris_bids_mri_rel_dir - if self.bids_ses_id: - copy_file += os.path.basename(file) - else: - # make sure the ses- is included in the new filename if using - # default visit label from the LORIS config - copy_file += str.replace( - os.path.basename(file), - "sub-" + self.bids_sub_id, - "sub-" + self.bids_sub_id + "_ses-" + self.default_vl - ) - if derivatives_path: - # create derivative subject/vl/modality directory - lib.utilities.create_dir( - derivatives_path + self.loris_bids_mri_rel_dir, - self.verbose - ) - copy_file = derivatives_path + copy_file - else: - copy_file = self.loris_bids_root_dir + copy_file - - # copy the file - utilities.copy_file(file, copy_file, self.verbose) - - # determine the relative path and return it - relative_path = copy_file.replace(self.data_dir, "") - - return relative_path diff --git a/python/lib/scanstsv.py b/python/lib/scanstsv.py deleted file mode 100644 index 5cb938615..000000000 --- a/python/lib/scanstsv.py +++ /dev/null @@ -1,128 +0,0 @@ -"""Deals with sub-XXX_scans.tsv BIDS files""" - -import os - -from dateutil.parser import parse - -import lib -import lib.utilities as utilities - - -class ScansTSV: - """ - This class reads the BIDS sub-XXX_scans.tsv file that includes acquisition level information - such as scan date or age at scan... - - :Example: - from lib.scanstsv import ScansTSV - - scan_info = ScansTSV(scans_tsv_file, acquisition_file) - - acquisition_time = scan_info.get_acquisition_time() - age_at_scan = scan_info.get_age_at_scan - - """ - - def __init__(self, scans_tsv_file, acquisition_file, verbose): - """ - Constructor method for the ScansTSV class - - :param scans_tsv_file : path to the BIDS sub-XXX_scans.tsv file - :type scans_tsv_file : str - :param acquisition_file: path to the acquisition file (.nii, .set, .edf...) - :type acquisition_file: str - """ - - self.verbose = verbose - - # store files' paths - self.scans_tsv_file = scans_tsv_file - self.acquisition_file = acquisition_file - - # read the TSV file and store the header names and data - self.tsv_entries = utilities.read_tsv_file(self.scans_tsv_file) - self.tsv_headers = self.tsv_entries[0] - - # get the acquisition information for the acquisition file - self.acquisition_data = self.find_acquisition_data() - - def find_acquisition_data(self): - """ - Gets the information for the acquisition file from the TSV file. - - :return: the acquisition information found in the TSV file for the acquisition file - :rtype: list - """ - - for entry in self.tsv_entries: - if os.path.basename(self.acquisition_file) in entry['filename']: - return entry - - def get_acquisition_time(self): - """ - Get the acquisition time of the acquisition file. - - :return: acquisition time or None if not found - :rtype: str - """ - - if not self.acquisition_data: - # if no entry in self.acquisition_data, then no information available to get the acquisition time - return None - - if 'acq_time' in self.acquisition_data: - acq_time_list = [ele for ele in self.tsv_entries if ele['filename'] in self.acquisition_file] - if len(acq_time_list) == 1: - # the variable name could be mri_acq_time, but is eeg originally. - eeg_acq_time = acq_time_list[0]['acq_time'] - else: - print('More than one or no acquisition time has been found for ', self.acquisition_file) - exit() - - if eeg_acq_time == 'n/a': - return None - - try: - eeg_acq_time = parse(eeg_acq_time) - except ValueError as e: - message = "ERROR: could not convert acquisition time '" + \ - eeg_acq_time + \ - "' to datetime: " + str(e) - print(message) - exit(lib.exitcode.PROGRAM_EXECUTION_FAILURE) - return eeg_acq_time - - return None - - def get_age_at_scan(self): - """ - Get the age at the time of acquisition. - - :return: age at acquisition time - :rtype: str - """ - - # list of possible header names containing the age information - age_header_list = ['age', 'age_at_scan', 'age_acq_time'] - - for header_name in age_header_list: - if header_name in self.tsv_headers and self.acquisition_data: - return self.acquisition_data[header_name].strip() - - return None - - def copy_scans_tsv_file_to_loris_bids_dir(self, bids_sub_id, loris_bids_root_dir, data_dir): - - original_file_path = self.scans_tsv_file - final_file_path = loris_bids_root_dir + '/sub-' + bids_sub_id + '/' + os.path.basename(self.scans_tsv_file) - - # copy the scans.tsv file to the new directory - if os.path.exists(final_file_path): - lib.utilities.append_to_tsv_file(original_file_path, final_file_path, "filename", self.verbose) - else: - lib.utilities.copy_file(original_file_path, final_file_path, self.verbose) - - # determine the relative path and return it - relative_path = final_file_path.replace(data_dir, '') - - return relative_path diff --git a/python/lib/session.py b/python/lib/session.py deleted file mode 100644 index dd34aa954..000000000 --- a/python/lib/session.py +++ /dev/null @@ -1,228 +0,0 @@ -"""This class gather functions for session handling.""" - -from typing_extensions import deprecated - -from lib.database_lib.candidate_db import CandidateDB -from lib.database_lib.project_cohort_rel import ProjectCohortRel -from lib.database_lib.session_db import SessionDB -from lib.database_lib.site import Site - - -class Session: - """ - This class gather functions that interact with the database and allow session - creation or to fetch session information directly from the database. - - :Example: - - from lib.session import Session - from lib.database import Database - - # database connection - db = Database(config.mysql, verbose) - db.connect() - - session = Session( - verbose, cand_id, visit_label, - center_id, project_id, cohort_id - ) - - # grep session information from the database - loris_vl_info = session.get_session_info_from_loris(db) - - # insert the session into the database - loris_vl_info = session.create_session(db) - - # disconnect from the database - db.disconnect() - """ - - def __init__(self, db, verbose, cand_id=None, visit_label=None, - center_id=None, project_id=None, cohort_id=None): - """ - Constructor method for the Session class. - - :param verbose : whether to be verbose - :type verbose : bool - :param cand_id : candidate's CandID - :type cand_id : int - :param visit_label : visit label - :type visit_label : str - :param center_id : center ID to associate with the session - :type center_id : int - :param project_id : project ID to associate with the session - :type project_id : int - :param cohort_id: cohort ID to associate with the session - :type cohort_id: int - """ - self.db = db - self.verbose = verbose - - self.proj_cohort_rel_db_obj = ProjectCohortRel(db, verbose) - self.candidate_db_obj = CandidateDB(db, verbose) - self.session_db_obj = SessionDB(db, verbose) - self.site_db_obj = Site(db, verbose) - - self.cand_id = str(cand_id) - self.visit_label = visit_label - self.center_id = center_id - self.project_id = project_id - self.cohort_id = cohort_id - - self.proj_cohort_rel_info_dict = dict() - self.session_info_dict = dict() - self.session_id = None - - def create_session(self): - """ - Creates a session using BIDS information. - - :param db: database handler object - :type db: object - - :return: dictionary with session info from the session's table - :rtype: dict - """ - # TODO refactor bids_import pipeline to use same functions as dcm2bids below. To be done in different PR though - if self.verbose: - print("Creating visit " + self.visit_label - + " for CandID " + self.cand_id) - - # fetch the candidate.ID associated to the CandID first - candidate_id = self.candidate_db_obj.get_candidate_id(self.cand_id) - column_names = ('CandidateID', 'Visit_label', 'CenterID', 'Current_stage') - values = (candidate_id, self.visit_label, str(self.center_id), 'Not Started') - - if self.project_id: - column_names = (*column_names, 'ProjectID') - values = (*values, str(self.project_id)) - - if self.cohort_id: - column_names = (*column_names, 'CohortID') - values = (*values, str(self.cohort_id)) - - self.db.insert( - table_name='session', - column_names=column_names, - values=values - ) - - loris_session_info = self.get_session_info_from_loris() - - return loris_session_info - - def get_session_info_from_loris(self): - """ - Grep session information from the session table using CandID and - Visit_label. - - :param db: database handler object - :type db: object - - :return: dictionary with session info from the session's table - :rtype: dict - """ - # TODO refactor bids_import pipeline to use same functions as dcm2bids below. To be done in different PR though - loris_session_info = self.db.pselect( - """ - SELECT PSCID, CandID, session.* - FROM session - JOIN candidate ON (candidate.ID=session.CandidateID) - WHERE CandID = %s AND Visit_label = %s - """, - (self.cand_id, self.visit_label) - ) - - return loris_session_info[0] if loris_session_info else None - - @deprecated('Use `lib.db.queries.site.try_get_site_with_psc_id_visit_label` instead') - def get_session_center_info(self, pscid, visit_label): - """ - Get the session center information based on the PSCID and visit label of a session. - - :param pscid: candidate site ID (PSCID) - :type pscid: str - :param visit_label: visit label - :type visit_label: str - - :return: dictionary of site information for the visit/candidate queried - :rtype: dict - """ - return self.session_db_obj.get_session_center_info(pscid, visit_label) - - @deprecated('Use `lib.db.queries.try_get_candidate_with_cand_id_visit_label` instead') - def create_session_dict(self, cand_id, visit_label): - """ - Creates the session information dictionary based on a candidate ID and visit label. This will populate - self.session_info_dict based on the result returned from the database query. - - :param cand_id: CandID - :type cand_id: int - :param visit_label: Visit label of the session - :type visit_label: str - """ - self.session_info_dict = self.session_db_obj.create_session_dict(cand_id, visit_label) - if self.session_info_dict: - self.cand_id = self.session_info_dict['CandID'] - self.visit_label = self.session_info_dict['Visit_label'] - self.center_id = self.session_info_dict['CenterID'] - self.project_id = self.session_info_dict['ProjectID'] - self.cohort_id = self.session_info_dict['CohortID'] - self.session_id = self.session_info_dict['ID'] - - @deprecated('Use `lib.db.models.session.DbSession` instead') - def insert_into_session(self, session_info_to_insert_dict): - """ - Insert a new row in the session table using fields list as column names and values as values. - - :param session_info_to_insert_dict: dictionary with the column names and values to use for insertion - :type session_info_to_insert_dict: dict - - :return: ID of the new session registered - :rtype: int - """ - self.session_id = self.session_db_obj.insert_into_session( - fields=list(session_info_to_insert_dict.keys()), - values=list(session_info_to_insert_dict.values()) - ) - - return self.session_id - - @deprecated('Use `lib.get_subject_session.get_candidate_next_visit_number` instead') - def get_next_session_site_id_and_visit_number(self, cand_id): - """ - Determines the next session site and visit number based on the last session inserted for a given candidate. - - :param cand_id: candidate ID - :type cand_id: int - - :return: a dictionary with 'newVisitNo' and 'CenterID' keys/values - :rtype: dict - """ - return self.session_db_obj.determine_next_session_site_id_and_visit_number(cand_id) - - @deprecated('Use `lib.db.queries.site.get_all_sites` instead') - def get_list_of_sites(self): - """ - Get the list of sites available in the psc table. - - :return: list of sites - :rtype: list - """ - - return self.site_db_obj.get_list_of_sites() - - @deprecated('Use `lib.db.models.project_cohort.DbProjectCohort` instead') - def create_proj_cohort_rel_info_dict(self, project_id, cohort_id): - """ - Populate self.proj_cohort_rel_info_dict with the content returned from the database for the ProjectID and - CohortID. - - :param project_id: ID of the Project - :type project_id: int - :param cohort_id: ID of the Cohort - :type cohort_id: int - """ - self.proj_cohort_rel_info_dict = self.proj_cohort_rel_db_obj.create_proj_cohort_rel_dict( - project_id, cohort_id - ) diff --git a/python/scripts/bids_import.py b/python/scripts/bids_import.py deleted file mode 100755 index 3c43940c4..000000000 --- a/python/scripts/bids_import.py +++ /dev/null @@ -1,610 +0,0 @@ -#!/usr/bin/env python - -"""Script to import BIDS structure into LORIS.""" - -import getopt -import json -import os -import re -import sys - -import lib.exitcode -import lib.physiological -import lib.utilities -from lib.bidsreader import BidsReader -from lib.candidate import Candidate -from lib.config_file import load_config -from lib.database import Database -from lib.database_lib.config import Config -from lib.eeg import Eeg -from lib.mri import Mri -from lib.session import Session -from lib.util.crypto import compute_file_blake2b_hash - -sys.path.append('/home/user/python') - - -# to limit the traceback when raising exceptions. -# sys.tracebacklimit = 0 - -def main(): - bids_dir = '' - verbose = False - createcand = False - createvisit = False - idsvalidation = False - nobidsvalidation = False - type = None - profile = None - nocopy = False - - long_options = [ - "help", "profile=", "directory=", - "createcandidate", "createsession", "idsvalidation", - "nobidsvalidation", "nocopy", "type=", - "verbose" - ] - usage = ( - '\n' - 'usage : bids_import -d -p \n\n' - 'options: \n' - '\t-p, --profile : name of the python database config file in the config directory\n' - '\t-d, --directory : BIDS directory to parse & insert into LORIS\n' - 'If directory is within $data_dir/assembly_bids, no copy will be performed' - '\t-c, --createcandidate : to create BIDS candidates in LORIS (optional)\n' - '\t-s, --createsession : to create BIDS sessions in LORIS (optional)\n' - '\t-i, --idsvalidation : to validate BIDS directory for a matching pscid/candid pair (optional)\n' - '\t-b, --nobidsvalidation : to disable BIDS validation for BIDS compliance\n' - '\t-a, --nocopy : to disable dataset copy in data assembly_bids\n' - '\t-t, --type : raw | derivative. Specify the dataset type.' - 'If not set, the pipeline will look for both raw and derivative files.\n' - 'Required if no dataset_description.json is found.\n' - '\t-v, --verbose : be verbose\n' - ) - - try: - opts, _ = getopt.getopt(sys.argv[1:], 'hp:d:csinat:v', long_options) - except getopt.GetoptError: - print(usage) - sys.exit(lib.exitcode.GETOPT_FAILURE) - - for opt, arg in opts: - if opt in ('-h', '--help'): - print(usage) - sys.exit() - elif opt in ('-p', '--profile'): - profile = arg - elif opt in ('-d', '--directory'): - bids_dir = arg - elif opt in ('-v', '--verbose'): - verbose = True - elif opt in ('-c', '--createcandidate'): - createcand = True - elif opt in ('-s', '--createsession'): - createvisit = True - elif opt in ('-i', '--idsvalidation'): - idsvalidation = True - elif opt in ('-n', '--nobidsvalidation'): - nobidsvalidation = True - elif opt in ('-a', '--nocopy'): - nocopy = True - elif opt in ('-t', '--type'): - type = arg - - # input error checking and load config_file file - config_file = load_config(profile) - input_error_checking(bids_dir, usage) - - dataset_json = bids_dir + "/dataset_description.json" - if not os.path.isfile(dataset_json) and not type: - print('No dataset_description.json found. Please run with the --type option.') - print(usage) - sys.exit(lib.exitcode.MISSING_ARG) - - if type and type not in ('raw', 'derivative'): - print("--type must be one of 'raw', 'derivative'") - print(usage) - sys.exit(lib.exitcode.MISSING_ARG) - - # database connection - db = Database(config_file.mysql, verbose) - db.connect() - - config_obj = Config(db, verbose) - data_dir = config_obj.get_config('dataDirBasepath') - # making sure that there is a final / in data_dir - data_dir = data_dir if data_dir.endswith('/') else data_dir + "/" - - # read and insert BIDS data - read_and_insert_bids( - bids_dir, - data_dir, - verbose, - createcand, - createvisit, - idsvalidation, - nobidsvalidation, - type, - nocopy, - db - ) - - -def input_error_checking(bids_dir, usage): - """ - Checks whether the required inputs are set and that paths are valid. - - :param bids_dir: path to the BIDS directory to parse and insert into LORIS - :type bids_dir: str - :param usage : script usage to be displayed when encountering an error - :type usage : st - """ - - if not bids_dir: - message = '\n\tERROR: you must specify a BIDS directory using -d or ' \ - '--directory option' - print(message) - print(usage) - sys.exit(lib.exitcode.MISSING_ARG) - - if not os.path.isdir(bids_dir): - message = '\n\tERROR: you must specify a valid BIDS directory.\n' + \ - bids_dir + ' does not exist!' - print(message) - print(usage) - sys.exit(lib.exitcode.INVALID_PATH) - - -def read_and_insert_bids( - bids_dir, data_dir, verbose, createcand, createvisit, - idsvalidation, nobidsvalidation, type, nocopy, db -): - """ - Read the provided BIDS structure and import it into the database. - - :param bids_dir : path to the BIDS directory - :type bids_dir : str - :param data_dir : data_dir config value - :type data_dir : string - :param verbose : flag for more printing if set - :type verbose : bool - :param createcand : allow database candidate creation if it did not exist already - :type createcand : bool - :param createvisit : allow database visit creation if it did not exist already - :type createvisit : bool - :param idsvalidation : allow pscid/candid validation in the BIDS directory name - :type idsvalidation : bool - :param nobidsvalidation : disable bids dataset validation - :type nobidsvalidation : bool - :param type : raw | derivative. Type of the dataset - :type type : string - :param nocopy : disable bids dataset copy in assembly_bids - :type nocopy : bool - :param db : db object - :type db : object - - """ - - # grep config settings from the Config module - config_obj = Config(db, verbose) - default_bids_vl = config_obj.get_config('default_bids_vl') - - # Validate that pscid and candid matches - if idsvalidation: - validateids(bids_dir, db, verbose) - - # load the BIDS directory - if nobidsvalidation: - bids_reader = BidsReader(bids_dir, verbose, False) - else: - bids_reader = BidsReader(bids_dir, verbose) - if not bids_reader.participants_info \ - or not bids_reader.cand_sessions_list \ - or not bids_reader.cand_session_modalities_list: - message = '\n\tERROR: could not properly parse the following' \ - 'BIDS directory:' + bids_dir + '\n' - print(message) - sys.exit(lib.exitcode.UNREADABLE_FILE) - - loris_bids_root_dir = None - if not nocopy: - # create the LORIS_BIDS directory in data_dir based on Name and BIDS version - loris_bids_root_dir = create_loris_bids_directory( - bids_reader, data_dir, verbose - ) - - # Assumption all same project (for project-wide tags) - single_project_id = None - - # loop through subjects - for bids_subject_info in bids_reader.participants_info: - - # greps BIDS information for the candidate - bids_id = bids_subject_info['participant_id'] - bids_sessions = bids_reader.cand_sessions_list[bids_id] - - # greps BIDS candidate's info from LORIS (creates the candidate if it - # does not exist yet in LORIS and the createcand flag is set to true) - loris_cand_info = grep_or_create_candidate_db_info( - bids_reader, bids_id, db, createcand, verbose - ) - - if not nocopy: - # create the candidate's directory in the LORIS BIDS import directory - lib.utilities.create_dir(loris_bids_root_dir + "sub-" + bids_id, verbose) - - cand_id = loris_cand_info['CandID'] - center_id = loris_cand_info['RegistrationCenterID'] - project_id = loris_cand_info['RegistrationProjectID'] - single_project_id = project_id - - cohort_id = None - # TODO: change subproject -> cohort in participants.tsv? - if 'subproject' in bids_subject_info: - # TODO: change subproject -> cohort in participants.tsv? - cohort = bids_subject_info['subproject'] - cohort_info = db.pselect( - "SELECT CohortID FROM cohort WHERE title = %s", - [cohort, ] - ) - if len(cohort_info) > 0: - cohort_id = cohort_info[0]['CohortID'] - - # greps BIDS session's info for the candidate from LORIS (creates the - # session if it does not exist yet in LORIS and the createvisit is set - # to true. If no visit in BIDS structure, then use default visit_label - # stored in the Config module) - grep_candidate_sessions_info( - bids_sessions, bids_id, cand_id, loris_bids_root_dir, - createvisit, verbose, db, default_bids_vl, - center_id, project_id, cohort_id, nocopy - ) - - # Import root-level (dataset-wide) events.json - # Assumption: Single project for project-wide tags - bids_layout = bids_reader.bids_layout - root_event_metadata_file = bids_layout.get_nearest( - bids_dir, - return_type='tuple', - strict=False, - extension='json', - suffix='events', - all_=False, - subject=None, - session=None - ) - - dataset_tag_dict = {} - if not root_event_metadata_file: - message = '\nWARNING: no events metadata files (events.json) in ' \ - 'root directory' - print(message) - else: - # copy the event file to the LORIS BIDS import directory - copy_file = str.replace( - root_event_metadata_file.path, - bids_layout.root, - "" - ).lstrip('/') - - if not nocopy: - event_metadata_path = loris_bids_root_dir + copy_file - lib.utilities.copy_file(root_event_metadata_file.path, event_metadata_path, verbose) - - # TODO: Move - hed_query = 'SELECT * FROM hed_schema_nodes WHERE 1' - hed_union = db.pselect(query=hed_query, args=()) - - # load json data - with open(root_event_metadata_file.path) as metadata_file: - event_metadata = json.load(metadata_file) - blake2 = compute_file_blake2b_hash(root_event_metadata_file.path) - physio = lib.physiological.Physiological(db, verbose) - _, dataset_tag_dict = physio.insert_event_metadata( - event_metadata=event_metadata, - event_metadata_file=event_metadata_path, - physiological_file_id=None, - project_id=single_project_id, - blake2=blake2, - project_wide=True, - hed_union=hed_union - ) - - # read list of modalities per session / candidate and register data - for row in bids_reader.cand_session_modalities_list: - bids_session = row['bids_ses_id'] - visit_label = bids_session if bids_session else default_bids_vl - loris_bids_visit_rel_dir = 'sub-' + row['bids_sub_id'] + '/' + 'ses-' + visit_label - - for modality in row['modalities']: - loris_bids_modality_rel_dir = loris_bids_visit_rel_dir + '/' + modality + '/' - if not nocopy: - lib.utilities.create_dir(loris_bids_root_dir + loris_bids_modality_rel_dir, verbose) - - if modality == 'eeg' or modality == 'ieeg': - Eeg( - bids_reader = bids_reader, - bids_sub_id = row['bids_sub_id'], - bids_ses_id = row['bids_ses_id'], - bids_modality = modality, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_eeg_rel_dir = loris_bids_modality_rel_dir, - loris_bids_root_dir = loris_bids_root_dir, - dataset_tag_dict = dataset_tag_dict, - dataset_type = type - ) - - elif modality in ['anat', 'dwi', 'fmap', 'func']: - Mri( - bids_reader = bids_reader, - bids_sub_id = row['bids_sub_id'], - bids_ses_id = row['bids_ses_id'], - bids_modality = modality, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_mri_rel_dir = loris_bids_modality_rel_dir, - loris_bids_root_dir = loris_bids_root_dir - ) - - # disconnect from the database - db.disconnect() - - -def validateids(bids_dir, db, verbose): - """ - Validate that pscid and candid matches - - :param bids_dir : path to the BIDS directory - :type bids_dir : str - :param db : database handler object - :type db : object - :param verbose : flag for more printing if set - :type verbose : bool - """ - - bids_folder = bids_dir.rstrip('/').split('/')[-1] - bids_folder_parts = bids_folder.split('_') - psc_id = bids_folder_parts[0] - cand_id = bids_folder_parts[1] - - candidate = Candidate(verbose, cand_id=cand_id) - loris_cand_info = candidate.get_candidate_info_from_loris(db) - - if not loris_cand_info: - print("ERROR: could not find a candidate with cand_id " + cand_id + ".") - sys.exit(lib.exitcode.CANDID_NOT_FOUND) - if loris_cand_info['PSCID'] != psc_id: - print("ERROR: cand_id " + cand_id + " and psc_id " + psc_id + " do not match.") - sys.exit(lib.exitcode.CANDIDATE_MISMATCH) - - -def create_loris_bids_directory(bids_reader, data_dir, verbose): - """ - Creates the LORIS BIDS import root directory (with name and BIDS version) - and copy over the dataset_description.json, README and participants.tsv - files. - - :param bids_reader: BIDS information handler object - :type bids_reader: object - :param data_dir : path of the LORIS data directory - :type data_dir : str - :param verbose : if true, prints out information while executing - :type verbose : bool - - :return: path to the LORIS BIDS import root directory - :rtype: str - """ - - # making sure that there is a final / in bids_dir - bids_dir = bids_reader.bids_dir - bids_dir = bids_dir if bids_dir.endswith('/') else bids_dir + "/" - - # determine the root directory of the LORIS BIDS and create it if does not exist - name = re.sub(r"[^0-9a-zA-Z]+", "_", bids_reader.dataset_name) # get name of the dataset - version = re.sub(r"[^0-9a-zA-Z\.]+", "_", bids_reader.bids_version) # get BIDSVersion of the dataset - - # the LORIS BIDS directory will be in data_dir/BIDS/ and named with the - # concatenation of the dataset name and the BIDS version - loris_bids_dirname = lib.utilities.create_dir( - data_dir + "bids_imports/" + name + "_BIDSVersion_" + version + "/", - verbose - ) - - # copy the dataset JSON file to the new directory - lib.utilities.copy_file( - bids_dir + "dataset_description.json", - loris_bids_dirname + "dataset_description.json", - verbose - ) - - # copy the README file to the new directory - if os.path.isfile(bids_dir + "README"): - lib.utilities.copy_file( - bids_dir + "README", - loris_bids_dirname + "README", - verbose - ) - - # copy the participant.tsv file to the new directory - if os.path.exists(loris_bids_dirname + "participants.tsv"): - lib.utilities.append_to_tsv_file( - bids_dir + "participants.tsv", - loris_bids_dirname + "participants.tsv", - "participant_id", - verbose - ) - else: - lib.utilities.copy_file( - bids_dir + "participants.tsv", - loris_bids_dirname + "participants.tsv", - verbose - ) - - return loris_bids_dirname - - -def grep_or_create_candidate_db_info(bids_reader, bids_id, db, createcand, verbose): - """ - Greps (or creates if candidate does not exist and createcand is true) the - BIDS candidate in the LORIS candidate's table and return a list of - candidates with their related fields from the database. - - :param bids_reader : BIDS information handler object - :type bids_reader : object - :param bids_id : bids_id to be used (CandID or PSCID) - :type bids_id : str - :param db : database handler object - :type db : object - :param createcand : if true, creates the candidate in LORIS - :type createcand : bool - :param verbose : if true, prints out information while executing - :type verbose : bool - - :return: list of candidate's dictionaries. One entry in the list holds - a dictionary with field's values from the candidate table - :rtype: list - """ - - candidate = Candidate(verbose=verbose, cand_id=bids_id) - loris_cand_info = candidate.get_candidate_info_from_loris(db) - - if not loris_cand_info: - candidate = Candidate(verbose, psc_id=bids_id) - loris_cand_info = candidate.get_candidate_info_from_loris(db) - - if not loris_cand_info and createcand: - loris_cand_info = candidate.create_candidate( - db, bids_reader.participants_info - ) - if not loris_cand_info: - print("Creating candidate failed. Cannot importing the files.\n") - sys.exit(lib.exitcode.CANDIDATE_CREATION_FAILURE) - - if not loris_cand_info: - print("Candidate " + bids_id + " not found. You can retry with the --createcandidate option.\n") - sys.exit(lib.exitcode.CANDIDATE_NOT_FOUND) - - return loris_cand_info - - -def grep_or_create_session_db_info( - bids_id, cand_id, visit_label, - db, createvisit, verbose, loris_bids_dir, - center_id, project_id, cohort_id, nocopy): - """ - Greps (or creates if session does not exist and createvisit is true) the - BIDS session in the LORIS session's table and return a list of - sessions with their related fields from the database. - - :parma bids_id : BIDS ID of the session - :type bids_id : str - :param cand_id : CandID to use to create the session - :type cand_id : int - :param visit_label : Visit label to use to create the session - :type visit_label : str - :param db : database handler object - :type db : object - :param createvisit : if true, creates the session in LORIS - :type createvisit : bool - :param verbose : if true, prints out information while executing - :type verbose : bool - :param loris_bids_dir: LORIS BIDS import root directory to copy data - :type loris_bids_dir: str - :param center_id : CenterID to use to create the session - :type center_id : int - :param project_id : ProjectID to use to create the session - :type project_id : int - :param cohort_id : CohortID to use to create the session - :type cohort_id : int - :param nocopy : if true, skip the assembly_bids dataset copy - :type nocopy : bool - - :return: session information grepped from LORIS for cand_id and visit_label - :rtype: dict - """ - - session = Session(db, verbose, cand_id, visit_label, center_id, project_id, cohort_id) - loris_vl_info = session.get_session_info_from_loris() - - if not loris_vl_info and createvisit: - loris_vl_info = session.create_session() - - if not nocopy: - # create the visit directory for in the candidate folder of the LORIS - # BIDS import directory - lib.utilities.create_dir( - loris_bids_dir + "sub-" + bids_id + "/ses-" + visit_label, - verbose - ) - - return loris_vl_info - - -def grep_candidate_sessions_info(bids_ses, bids_id, cand_id, loris_bids_dir, - createvisit, verbose, db, default_vl, - center_id, project_id, cohort_id, nocopy): - """ - Greps all session info dictionaries for a given candidate and aggregates - them into a list, with one entry per session. If the session does not - exist in LORIS and that createvisit is true, it will create the session - first. - - :param bids_ses : list of BIDS sessions to grep info or insert - :type bids_ses : list - :param bids_id : BIDS ID of the candidate - :type bids_id : str - :param cand_id : candidate's CandID - :type cand_id : int - :param loris_bids_dir: LORIS BIDS import root directory to copy data - :type loris_bids_dir: str - :param createvisit : if true, creates the visits in LORIS - :type createvisit : bool - :param verbose : if true, prints out information while executing - :type verbose : bool - :param db : database handler object - :type db : object - :param default_vl : default visit label from the Config module - :type default_vl : str - :param center_id : center ID associated to the candidate and visit - :type center_id : int - :param project_id : project ID associated to the candidate and visit - :type project_id : int - :param cohort_id : cohort ID associated to the candidate and visit - :type cohort_id : int - :param nocopy : if true, skip the assembly_bids dataset copy - :type nocopy : bool - - - - :return: list of all session's dictionaries for a given candidate - :rtype: list - """ - - loris_sessions_info = [] - - if not bids_ses: - loris_ses_info = grep_or_create_session_db_info( - bids_id, cand_id, default_vl, db, - createvisit, verbose, loris_bids_dir, - center_id, project_id, cohort_id, nocopy - ) - loris_sessions_info.append(loris_ses_info) - else: - for visit_label in bids_ses: - loris_ses_info = grep_or_create_session_db_info( - bids_id, cand_id, visit_label, db, - createvisit, verbose, loris_bids_dir, - center_id, project_id, cohort_id, nocopy - ) - loris_sessions_info.append(loris_ses_info) - - return loris_sessions_info - - -if __name__ == "__main__": - main() diff --git a/python/scripts/import_bids_dataset.py b/python/scripts/import_bids_dataset.py new file mode 100755 index 000000000..0b81e8b13 --- /dev/null +++ b/python/scripts/import_bids_dataset.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python + +"""Script to import BIDS structure into LORIS.""" + +import os +from typing import Any + +import lib.exitcode +from lib.import_bids_dataset.args import Args +from lib.import_bids_dataset.main import import_bids_dataset +from lib.logging import log_error_exit +from lib.lorisgetopt import LorisGetOpt +from lib.make_env import make_env + + +def pack_args(options_dict: dict[str, Any]) -> Args: + return Args( + source_bids_path = os.path.normpath(options_dict['directory']['value']), + type = options_dict['type']['value'], + bids_validation = not options_dict['nobidsvalidation']['value'], + create_candidate = options_dict['createcandidate']['value'], + create_session = options_dict['createsession']['value'], + copy = not options_dict['nocopy']['value'], + verbose = options_dict['verbose']['value'], + ) + + +# to limit the traceback when raising exceptions. +# sys.tracebacklimit = 0 + +def main(): + usage = ( + "\n" + "usage : bids_import -d -p \n" + "\n" + "options: \n" + "\t-p, --profile : name of the python database config file in dicom-archive/.loris-mri\n" + "\t-d, --directory : BIDS directory to parse & insert into LORIS\n" + "\t If directory is within $data_dir/assembly_bids, no copy will be performed\n" + "\t-c, --createcandidate : to create BIDS candidates in LORIS (optional)\n" + "\t-s, --createsession : to create BIDS sessions in LORIS (optional)\n" + "\t-i, --idsvalidation : to validate BIDS directory for a matching pscid/candid pair (optional)\n" + "\t-b, --nobidsvalidation : to disable BIDS validation for BIDS compliance\n" + "\t-a, --nocopy : to disable dataset copy in data assembly_bids\n" + "\t-t, --type : raw | derivative. Specify the dataset type.\n" + "\t If not set, the pipeline will look for both raw and derivative files.\n" + "\t Required if no dataset_description.json is found.\n" + "\t-v, --verbose : be verbose\n" + ) + + options_dict = { + "profile": { + "value": None, "required": True, "expect_arg": True, "short_opt": "p", "is_path": False + }, + "directory": { + "value": None, "required": True, "expect_arg": True, "short_opt": "d", "is_path": True + }, + "createcandidate": { + "value": False, "required": False, "expect_arg": False, "short_opt": "cc", "is_path": False + }, + "createsession": { + "value": False, "required": False, "expect_arg": False, "short_opt": "cc", "is_path": False + }, + "nobidsvalidation": { + "value": False, "required": False, "expect_arg": False, "short_opt": "nv", "is_path": False + }, + "nocopy": { + "value": False, "required": False, "expect_arg": False, "short_opt": "nc", "is_path": False + }, + "type": { + "value": None, "required": False, "expect_arg": True, "short_opt": "t", "is_path": False + }, + "verbose": { + "value": False, "required": False, "expect_arg": False, "short_opt": "v", "is_path": False + }, + "help": { + "value": False, "required": False, "expect_arg": False, "short_opt": "h", "is_path": False + }, + } + + # Get the CLI arguments and initiate the environment. + + loris_getopt_obj = LorisGetOpt(usage, options_dict, os.path.basename(__file__[:-3])) + + env = make_env(loris_getopt_obj) + + # Check the CLI arguments. + + type = loris_getopt_obj.options_dict['type']['value'] + if type not in (None, 'raw', 'derivative'): + log_error_exit( + env, + f"--type must be one of 'raw', 'derivative'\n{usage}", + lib.exitcode.MISSING_ARG, + ) + + args = pack_args(loris_getopt_obj.options_dict) + + # read and insert BIDS data + import_bids_dataset( + env, + args, + loris_getopt_obj.db, + ) + + print("Success !") + + +if __name__ == '__main__': + main() From 1239f5300d3c7add24aa475b49c026584a4c8e0a Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Tue, 4 Nov 2025 04:58:35 +0000 Subject: [PATCH 02/12] fix eeg path concatenation --- python/lib/eeg.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/lib/eeg.py b/python/lib/eeg.py index f52b87f92..fa796cf1e 100644 --- a/python/lib/eeg.py +++ b/python/lib/eeg.py @@ -706,7 +706,8 @@ def copy_file_to_loris_bids_dir(self, file, derivatives=False, inheritance=False self.bids_layout.root, "" ) - copy_file = self.loris_bids_root_dir + copy_file + + copy_file = os.path.join(self.loris_bids_root_dir, copy_file) # create derivative directories lib.utilities.create_dir( @@ -728,7 +729,8 @@ def copy_file_to_loris_bids_dir(self, file, derivatives=False, inheritance=False "sub-" + self.data_type.subject.label, "sub-" + self.data_type.subject.label + "_ses-" + self.default_vl ) - copy_file = self.loris_bids_root_dir + copy_file + + copy_file = os.path.join(self.loris_bids_root_dir, copy_file) # copy the file utilities.copy_file(file, copy_file, self.verbose) From 6448a6cef883aac5cf2a34c62de6884404db88ba Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Thu, 6 Nov 2025 04:55:05 +0000 Subject: [PATCH 03/12] fix cli options --- python/scripts/import_bids_dataset.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/python/scripts/import_bids_dataset.py b/python/scripts/import_bids_dataset.py index 0b81e8b13..6ce1283df 100755 --- a/python/scripts/import_bids_dataset.py +++ b/python/scripts/import_bids_dataset.py @@ -39,7 +39,6 @@ def main(): "\t If directory is within $data_dir/assembly_bids, no copy will be performed\n" "\t-c, --createcandidate : to create BIDS candidates in LORIS (optional)\n" "\t-s, --createsession : to create BIDS sessions in LORIS (optional)\n" - "\t-i, --idsvalidation : to validate BIDS directory for a matching pscid/candid pair (optional)\n" "\t-b, --nobidsvalidation : to disable BIDS validation for BIDS compliance\n" "\t-a, --nocopy : to disable dataset copy in data assembly_bids\n" "\t-t, --type : raw | derivative. Specify the dataset type.\n" @@ -56,16 +55,16 @@ def main(): "value": None, "required": True, "expect_arg": True, "short_opt": "d", "is_path": True }, "createcandidate": { - "value": False, "required": False, "expect_arg": False, "short_opt": "cc", "is_path": False + "value": False, "required": False, "expect_arg": False, "short_opt": "c", "is_path": False }, "createsession": { - "value": False, "required": False, "expect_arg": False, "short_opt": "cc", "is_path": False + "value": False, "required": False, "expect_arg": False, "short_opt": "s", "is_path": False }, "nobidsvalidation": { - "value": False, "required": False, "expect_arg": False, "short_opt": "nv", "is_path": False + "value": False, "required": False, "expect_arg": False, "short_opt": "b", "is_path": False }, "nocopy": { - "value": False, "required": False, "expect_arg": False, "short_opt": "nc", "is_path": False + "value": False, "required": False, "expect_arg": False, "short_opt": "a", "is_path": False }, "type": { "value": None, "required": False, "expect_arg": True, "short_opt": "t", "is_path": False From d16441e1dddb29c5ef49851e1cda428514a88adb Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Thu, 6 Nov 2025 05:50:32 +0000 Subject: [PATCH 04/12] make profile cli argument optional --- python/scripts/import_bids_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/scripts/import_bids_dataset.py b/python/scripts/import_bids_dataset.py index 6ce1283df..7267f1608 100755 --- a/python/scripts/import_bids_dataset.py +++ b/python/scripts/import_bids_dataset.py @@ -49,7 +49,7 @@ def main(): options_dict = { "profile": { - "value": None, "required": True, "expect_arg": True, "short_opt": "p", "is_path": False + "value": None, "required": False, "expect_arg": True, "short_opt": "p", "is_path": False }, "directory": { "value": None, "required": True, "expect_arg": True, "short_opt": "d", "is_path": True From 608f36ba879da19df1c22d663cf29db371b79cbe Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Thu, 6 Nov 2025 06:57:00 +0000 Subject: [PATCH 05/12] add missing candidate and session fields --- .../check_subjects_sessions.py | 27 ++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/python/lib/import_bids_dataset/check_subjects_sessions.py b/python/lib/import_bids_dataset/check_subjects_sessions.py index fb2e477ac..fabedb344 100644 --- a/python/lib/import_bids_dataset/check_subjects_sessions.py +++ b/python/lib/import_bids_dataset/check_subjects_sessions.py @@ -210,6 +210,8 @@ def create_bids_candidate(env: Env, tsv_participant: BidsTsvParticipant) -> DbCa ) ) + now = datetime.now() + candidate = DbCandidate( cand_id = cand_id, psc_id = psc_id, @@ -217,6 +219,11 @@ def create_bids_candidate(env: Env, tsv_participant: BidsTsvParticipant) -> DbCa sex = sex, registration_site_id = site.id, registration_project_id = project.id, + user_id = 'imaging.py', + entity_type = 'Human', + date_active = now, + date_registered = now, + active = True, ) env.db.add(candidate) @@ -270,12 +277,20 @@ def create_bids_session(env: Env, candidate: DbCandidate, cohort: DbCohort | Non ) session = DbSession( - candidate_id = candidate.id, - visit_label = visit_label, - current_stage = 'Not Started', - site_id = candidate.registration_site_id, - project_id = candidate.registration_project_id, - cohort_id = cohort.id, + candidate_id = candidate.id, + visit_label = visit_label, + current_stage = 'Not Started', + site_id = candidate.registration_site_id, + project_id = candidate.registration_project_id, + cohort_id = cohort.id, + scan_done = True, + submitted = False, + active = True, + user_id = '', + hardcopy_request = '-', + mri_qc_status = '', + mri_qc_pending = False, + mri_caveat = True, ) env.db.add(session) From 55d4303a38063fb9bcd587327f9034dc7a5c7d5d Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Thu, 6 Nov 2025 08:50:37 +0000 Subject: [PATCH 06/12] fix print statement --- python/lib/import_bids_dataset/check_subjects_sessions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lib/import_bids_dataset/check_subjects_sessions.py b/python/lib/import_bids_dataset/check_subjects_sessions.py index fabedb344..6485472f7 100644 --- a/python/lib/import_bids_dataset/check_subjects_sessions.py +++ b/python/lib/import_bids_dataset/check_subjects_sessions.py @@ -271,7 +271,7 @@ def create_bids_session(env: Env, candidate: DbCandidate, cohort: DbCohort | Non env, ( "Creating session with:\n" - f" PSCID = {candidate.cand_id}\n" + f" PSCID = {candidate.psc_id}\n" f" Visit label = {visit_label}" ) ) From 2af7b5babcebc271e1eab2df369cb3d1e8b9e8eb Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Sun, 16 Nov 2025 06:03:28 +0000 Subject: [PATCH 07/12] use lists instead of tuples for eeg paths --- python/lib/eeg.py | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/python/lib/eeg.py b/python/lib/eeg.py index fa796cf1e..1531a2053 100644 --- a/python/lib/eeg.py +++ b/python/lib/eeg.py @@ -172,21 +172,21 @@ def register_data(self, derivatives=False, detect=True): ) # archive all files in a tar ball for downloading all files at once - files_to_archive = (os.path.join(self.data_dir, eeg_file_path),) + files_to_archive: list[str] = [os.path.join(self.data_dir, eeg_file_path)] if eegjson_file_path: - files_to_archive = (*files_to_archive, os.path.join(self.data_dir, eegjson_file_path)) + files_to_archive.append(os.path.join(self.data_dir, eegjson_file_path)) if fdt_file_path: - files_to_archive = (*files_to_archive, os.path.join(self.data_dir, fdt_file_path)) + files_to_archive.append(os.path.join(self.data_dir, fdt_file_path)) if electrode_file_path: - files_to_archive = (*files_to_archive, os.path.join(self.data_dir, electrode_file_path)) + files_to_archive.append(os.path.join(self.data_dir, electrode_file_path)) if event_file_paths: # archive all event files in a tar ball for event download - event_files_to_archive = () + event_files_to_archive: list[str] = [] for event_file_path in event_file_paths: - files_to_archive = (*files_to_archive, os.path.join(self.data_dir, event_file_path)) - event_files_to_archive = (*event_files_to_archive, os.path.join(self.data_dir, event_file_path)) + files_to_archive.append(os.path.join(self.data_dir, event_file_path)) + event_files_to_archive.append(os.path.join(self.data_dir, event_file_path)) event_archive_rel_name = os.path.splitext(event_file_paths[0])[0] + ".tgz" self.create_and_insert_event_archive( @@ -194,7 +194,7 @@ def register_data(self, derivatives=False, detect=True): ) if channel_file_path: - files_to_archive = (*files_to_archive, os.path.join(self.data_dir, channel_file_path)) + files_to_archive.append(os.path.join(self.data_dir, channel_file_path)) archive_rel_name = os.path.splitext(eeg_file_path)[0] + ".tgz" self.create_and_insert_archive( @@ -740,17 +740,13 @@ def copy_file_to_loris_bids_dir(self, file, derivatives=False, inheritance=False return relative_path - def create_and_insert_archive(self, files_to_archive, archive_rel_name, - eeg_file_id): + def create_and_insert_archive(self, files_to_archive: list[str], archive_rel_name: str, eeg_file_id): """ Create an archive with all electrophysiology files associated to a specific recording (including electrodes.tsv, channels.tsv etc...) - :param files_to_archive: tuple with the list of files to include in - the archive - :type files_to_archive: tuple + :param files_to_archive: list of files to include in the archive :param archive_rel_name: path to the archive relative to data_dir - :type archive_rel_name: str :param eeg_file_id : PhysiologicalFileID :type eeg_file_id : int """ @@ -798,15 +794,12 @@ def create_and_insert_archive(self, files_to_archive, archive_rel_name, } physiological.insert_archive_file(archive_info) - def create_and_insert_event_archive(self, files_to_archive, archive_rel_name, eeg_file_id): + def create_and_insert_event_archive(self, files_to_archive: list[str], archive_rel_name: str, eeg_file_id): """ Create an archive with all event files associated to a specific recording - :param files_to_archive: tuple with the list of files to include in - the archive - :type files_to_archive: tuple + :param files_to_archive: list of files to include in the archive :param archive_rel_name: path to the archive relative to data_dir - :type archive_rel_name: str :param eeg_file_id : PhysiologicalFileID :type eeg_file_id : int """ From 7decb24dc953dca6344e032a34f2242ef1aea0b2 Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Sun, 16 Nov 2025 07:00:09 +0000 Subject: [PATCH 08/12] rename bids classes --- python/lib/eeg.py | 4 +- python/lib/imaging_lib/bids/dataset.py | 80 +++++++++---------- .../check_subjects_sessions.py | 8 +- python/lib/import_bids_dataset/env.py | 2 +- python/lib/import_bids_dataset/events.py | 4 +- python/lib/import_bids_dataset/main.py | 26 +++--- python/lib/import_bids_dataset/mri.py | 12 +-- python/lib/import_bids_dataset/print.py | 4 +- 8 files changed, 70 insertions(+), 70 deletions(-) diff --git a/python/lib/eeg.py b/python/lib/eeg.py index 1531a2053..ccdb3e313 100644 --- a/python/lib/eeg.py +++ b/python/lib/eeg.py @@ -14,7 +14,7 @@ from lib.database_lib.physiological_modality import PhysiologicalModality from lib.database_lib.physiological_output_type import PhysiologicalOutputType from lib.db.models.session import DbSession -from lib.imaging_lib.bids.dataset import BidsDataType +from lib.imaging_lib.bids.dataset import BIDSDataType from lib.physiological import Physiological from lib.util.crypto import compute_file_blake2b_hash @@ -26,7 +26,7 @@ class Eeg: """ def __init__( - self, data_type: BidsDataType, session: DbSession, db: Database, verbose: bool, data_dir: str, + self, data_type: BIDSDataType, session: DbSession, db: Database, verbose: bool, data_dir: str, loris_bids_eeg_rel_dir: str, loris_bids_root_dir: str | None, dataset_tag_dict: dict[Any, Any], dataset_type: Literal['raw', 'derivative'] | None, ): diff --git a/python/lib/imaging_lib/bids/dataset.py b/python/lib/imaging_lib/bids/dataset.py index 37664ed1f..1689c45b8 100644 --- a/python/lib/imaging_lib/bids/dataset.py +++ b/python/lib/imaging_lib/bids/dataset.py @@ -17,7 +17,7 @@ PYBIDS_FORCE = [re.compile(r"_annotations\.(tsv|json)$")] -class BidsDataset: +class BIDSDataset: path: str validate: bool @@ -26,27 +26,27 @@ def __init__(self, bids_path: str, validate: bool): self.validate = validate @property - def sessions(self) -> Iterator['BidsSession']: + def sessions(self) -> Iterator['BIDSSession']: for subject in self.subjects: yield from subject.sessions @property - def data_types(self) -> Iterator['BidsDataType']: + def data_types(self) -> Iterator['BIDSDataType']: for session in self.sessions: yield from session.data_types @property - def niftis(self) -> Iterator['BidsNifti']: + def niftis(self) -> Iterator['BIDSNifti']: for data_type in self.data_types: yield from data_type.niftis @cached_property - def subjects(self) -> list['BidsSubject']: + def subjects(self) -> list['BIDSSubject']: """ The subject directories found in the BIDS dataset. """ - subjects: list[BidsSubject] = [] + subjects: list[BIDSSubject] = [] for file in os.scandir(self.path): subject_match = re.match(r'sub-([a-zA-Z0-9]+)', file.name) @@ -57,7 +57,7 @@ def subjects(self) -> list['BidsSubject']: continue subject_label = subject_match.group(1) - subjects.append(BidsSubject(self, subject_label)) + subjects.append(BIDSSubject(self, subject_label)) return subjects @@ -107,7 +107,7 @@ def session_labels(self) -> list[str]: session_labels.sort() return session_labels - def get_subject(self, subject_label: str) -> 'BidsSubject | None': + def get_subject(self, subject_label: str) -> 'BIDSSubject | None': """ Get the subject directory corresponding to a subject label in this BIDS dataset or `None` if it does not exist. @@ -141,33 +141,33 @@ def layout(self) -> BIDSLayout: ) -class BidsSubject: - root_dataset: BidsDataset +class BIDSSubject: + root_dataset: BIDSDataset label: str path: str - def __init__(self, root_dataset: BidsDataset, label: str): + def __init__(self, root_dataset: BIDSDataset, label: str): self.root_dataset = root_dataset self.label = label self.path = os.path.join(self.root_dataset.path, f'sub-{self.label}') @property - def data_types(self) -> Iterator['BidsDataType']: + def data_types(self) -> Iterator['BIDSDataType']: for session in self.sessions: yield from session.data_types @property - def niftis(self) -> Iterator['BidsNifti']: + def niftis(self) -> Iterator['BIDSNifti']: for data_type in self.data_types: yield from data_type.niftis @cached_property - def sessions(self) -> list['BidsSession']: + def sessions(self) -> list['BIDSSession']: """ The session directories found in this subject directory. """ - sessions: list[BidsSession] = [] + sessions: list[BIDSSession] = [] for file in os.scandir(self.path): if not os.path.isdir(file): @@ -178,14 +178,14 @@ def sessions(self) -> list['BidsSession']: continue session_label = session_match.group(1) - sessions.append(BidsSession(self, session_label)) + sessions.append(BIDSSession(self, session_label)) if sessions == []: - sessions.append(BidsSession(self, None)) + sessions.append(BIDSSession(self, None)) return sessions - def get_session(self, session_label: str) -> 'BidsSession | None': + def get_session(self, session_label: str) -> 'BIDSSession | None': """ Get a session directory of this subject directory or `None` if it does not exist. """ @@ -193,13 +193,13 @@ def get_session(self, session_label: str) -> 'BidsSession | None': return find(lambda session: session.label == session_label, self.sessions) -class BidsSession: - subject: BidsSubject +class BIDSSession: + subject: BIDSSubject label: str | None path: str tsv_scans_path: str | None - def __init__(self, subject: BidsSubject, label: str | None): + def __init__(self, subject: BIDSSubject, label: str | None): self.subject = subject self.label = label if label is None: @@ -214,27 +214,27 @@ def __init__(self, subject: BidsSubject, label: str | None): self.tsv_scans_path = None @property - def root_dataset(self) -> BidsDataset: + def root_dataset(self) -> BIDSDataset: return self.subject.root_dataset @property - def niftis(self) -> Iterator['BidsNifti']: + def niftis(self) -> Iterator['BIDSNifti']: for data_type in self.data_types: yield from data_type.niftis @cached_property - def data_types(self) -> list['BidsDataType']: + def data_types(self) -> list['BIDSDataType']: """ The data type directories found in this session directory. """ - data_types: list[BidsDataType] = [] + data_types: list[BIDSDataType] = [] for file in os.scandir(self.path): if not os.path.isdir(file): continue - data_types.append(BidsDataType(self, file.name)) + data_types.append(BIDSDataType(self, file.name)) return data_types @@ -262,45 +262,45 @@ def get_tsv_scan(self, file_name: str) -> 'BidsTsvScan | None': return self.tsv_scans.get(file_name) -class BidsDataType: - session: BidsSession +class BIDSDataType: + session: BIDSSession name: str path: str - def __init__(self, session: BidsSession, name: str): + def __init__(self, session: BIDSSession, name: str): self.session = session self.name = name self.path = os.path.join(self.session.path, self.name) @property - def root_dataset(self) -> BidsDataset: + def root_dataset(self) -> BIDSDataset: return self.session.root_dataset @property - def subject(self) -> BidsSubject: + def subject(self) -> BIDSSubject: return self.session.subject @cached_property - def niftis(self) -> list['BidsNifti']: + def niftis(self) -> list['BIDSNifti']: """ The NIfTI files found in this data type directory. """ - niftis: list[BidsNifti] = [] + niftis: list[BIDSNifti] = [] for nifti_name in find_dir_nifti_names(self.path): - niftis.append(BidsNifti(self, nifti_name)) + niftis.append(BIDSNifti(self, nifti_name)) return niftis -class BidsNifti: - data_type: BidsDataType +class BIDSNifti: + data_type: BIDSDataType name: str path: str suffix: str | None - def __init__(self, data_type: BidsDataType, name: str): + def __init__(self, data_type: BIDSDataType, name: str): self.data_type = data_type self.path = os.path.join(self.data_type.path, name) self.name = name @@ -312,15 +312,15 @@ def __init__(self, data_type: BidsDataType, name: str): self.suffix = None @property - def root_dataset(self) -> BidsDataset: + def root_dataset(self) -> BIDSDataset: return self.data_type.root_dataset @property - def subject(self) -> BidsSubject: + def subject(self) -> BIDSSubject: return self.data_type.subject @property - def session(self) -> BidsSession: + def session(self) -> BIDSSession: return self.data_type.session def get_json_path(self) -> str | None: diff --git a/python/lib/import_bids_dataset/check_subjects_sessions.py b/python/lib/import_bids_dataset/check_subjects_sessions.py index 6485472f7..b56798971 100644 --- a/python/lib/import_bids_dataset/check_subjects_sessions.py +++ b/python/lib/import_bids_dataset/check_subjects_sessions.py @@ -18,7 +18,7 @@ from lib.db.queries.site import try_get_site_with_alias, try_get_site_with_name from lib.db.queries.visit import try_get_visit_with_visit_label from lib.env import Env -from lib.imaging_lib.bids.dataset import BidsDataset, BidsSubject +from lib.imaging_lib.bids.dataset import BIDSDataset, BIDSSubject from lib.imaging_lib.bids.tsv_participants import BidsTsvParticipant from lib.logging import log, log_error, log_error_exit @@ -34,7 +34,7 @@ def __init__(self, message: str): def check_bids_session_labels( env: Env, - bids: BidsDataset, + bids: BIDSDataset, ): """ Check that all the session labels in a BIDS dataset correspond to a LORIS visit, or exit the @@ -61,7 +61,7 @@ def check_bids_session_labels( def check_or_create_bids_subjects_and_sessions( env: Env, - bids: BidsDataset, + bids: BIDSDataset, create_candidate: bool, create_session: bool, ) -> int: @@ -110,7 +110,7 @@ def check_or_create_bids_subjects_and_sessions( def check_or_create_bids_subject_and_sessions( env: Env, - subject: BidsSubject, + subject: BIDSSubject, create_candidate: bool, create_session: bool, ) -> DbCandidate: diff --git a/python/lib/import_bids_dataset/env.py b/python/lib/import_bids_dataset/env.py index 8d376d4d5..1db2491f5 100644 --- a/python/lib/import_bids_dataset/env.py +++ b/python/lib/import_bids_dataset/env.py @@ -2,7 +2,7 @@ @dataclass -class BidsImportEnv: +class BIDSImportEnv: """ Pipeline-specific variables of the BIDS dataset import pipeline. """ diff --git a/python/lib/import_bids_dataset/events.py b/python/lib/import_bids_dataset/events.py index 29882a2fb..5d72d0d3d 100644 --- a/python/lib/import_bids_dataset/events.py +++ b/python/lib/import_bids_dataset/events.py @@ -5,7 +5,7 @@ import lib.utilities from lib.database import Database from lib.env import Env -from lib.imaging_lib.bids.dataset import BidsDataset +from lib.imaging_lib.bids.dataset import BIDSDataset from lib.import_bids_dataset.args import Args from lib.logging import log_warning from lib.physiological import Physiological @@ -15,7 +15,7 @@ def get_events_metadata( env: Env, args: Args, - bids: BidsDataset, + bids: BIDSDataset, legacy_db: Database, loris_bids_path: str | None, project_id: int, diff --git a/python/lib/import_bids_dataset/main.py b/python/lib/import_bids_dataset/main.py index 2c1a601ff..0d57236d7 100644 --- a/python/lib/import_bids_dataset/main.py +++ b/python/lib/import_bids_dataset/main.py @@ -10,7 +10,7 @@ from lib.db.queries.session import try_get_session_with_cand_id_visit_label from lib.eeg import Eeg from lib.env import Env -from lib.imaging_lib.bids.dataset import BidsDataset, BidsDataType, BidsSession +from lib.imaging_lib.bids.dataset import BIDSDataset, BIDSDataType, BIDSSession from lib.imaging_lib.bids.dataset_description import BidsDatasetDescriptionError from lib.imaging_lib.bids.tsv_participants import ( BidsTsvParticipant, @@ -29,7 +29,7 @@ check_bids_session_labels, check_or_create_bids_subjects_and_sessions, ) -from lib.import_bids_dataset.env import BidsImportEnv +from lib.import_bids_dataset.env import BIDSImportEnv from lib.import_bids_dataset.events import get_events_metadata from lib.import_bids_dataset.mri import import_bids_nifti from lib.import_bids_dataset.print import print_bids_import_summary @@ -50,7 +50,7 @@ def import_bids_dataset(env: Env, args: Args, legacy_db: Database): log(env, "Parsing BIDS dataset...") - bids = BidsDataset(args.source_bids_path, args.bids_validation) + bids = BIDSDataset(args.source_bids_path, args.bids_validation) niftis_count = count(bids.niftis) @@ -90,7 +90,7 @@ def import_bids_dataset(env: Env, args: Args, legacy_db: Database): # Process each session directory. - import_env = BidsImportEnv( + import_env = BIDSImportEnv( data_dir_path = data_dir_path, loris_bids_path = loris_bids_path, total_files_count = niftis_count, @@ -111,9 +111,9 @@ def import_bids_dataset(env: Env, args: Args, legacy_db: Database): def import_bids_session( env: Env, - import_env: BidsImportEnv, + import_env: BIDSImportEnv, args: Args, - bids_session: BidsSession, + bids_session: BIDSSession, events_metadata: dict[Any, Any], legacy_db: Database, ): @@ -165,10 +165,10 @@ def import_bids_session( def import_bids_data_type_files( env: Env, - import_env: BidsImportEnv, + import_env: BIDSImportEnv, args: Args, session: DbSession, - data_type: BidsDataType, + data_type: BIDSDataType, events_metadata: dict[Any, Any], legacy_db: Database, ): @@ -186,10 +186,10 @@ def import_bids_data_type_files( def import_bids_mri_data_type_files( env: Env, - import_env: BidsImportEnv, + import_env: BIDSImportEnv, args: Args, session: DbSession, - data_type: BidsDataType, + data_type: BIDSDataType, ): """ Read the BIDS MRI data type directory and import its files into LORIS. @@ -218,10 +218,10 @@ def import_bids_mri_data_type_files( def import_bids_eeg_data_type_files( env: Env, - import_env: BidsImportEnv, + import_env: BIDSImportEnv, args: Args, session: DbSession, - data_type: BidsDataType, + data_type: BIDSDataType, events_metadata: dict[Any, Any], legacy_db: Database, ): @@ -286,7 +286,7 @@ def copy_static_dataset_files(source_bids_path: str, loris_bids_path: str): shutil.copyfile(source_file_path, loris_file_path) -def get_loris_bids_path(env: Env, bids: BidsDataset, data_dir_path: str) -> str: +def get_loris_bids_path(env: Env, bids: BIDSDataset, data_dir_path: str) -> str: """ Get the LORIS BIDS directory path for the BIDS dataset to import, and create that directory if it does not exist yet. diff --git a/python/lib/import_bids_dataset/mri.py b/python/lib/import_bids_dataset/mri.py index 1538645a4..b5b9a7080 100644 --- a/python/lib/import_bids_dataset/mri.py +++ b/python/lib/import_bids_dataset/mri.py @@ -7,7 +7,7 @@ from lib.db.queries.file import try_get_file_with_hash, try_get_file_with_rel_path from lib.db.queries.mri_scan_type import try_get_mri_scan_type_with_name from lib.env import Env -from lib.imaging_lib.bids.dataset import BidsNifti +from lib.imaging_lib.bids.dataset import BIDSNifti from lib.imaging_lib.bids.json import add_bids_json_file_parameters from lib.imaging_lib.bids.tsv_scans import add_scan_tsv_file_parameters from lib.imaging_lib.bids.util import determine_bids_file_type @@ -16,7 +16,7 @@ from lib.imaging_lib.mri_scan_type import create_mri_scan_type from lib.imaging_lib.nifti import add_nifti_file_parameters from lib.imaging_lib.nifti_pic import create_imaging_pic -from lib.import_bids_dataset.env import BidsImportEnv +from lib.import_bids_dataset.env import BIDSImportEnv from lib.logging import log, log_warning from lib.util.crypto import compute_file_blake2b_hash from lib.util.fs import get_file_extension @@ -38,7 +38,7 @@ } -def import_bids_nifti(env: Env, import_env: BidsImportEnv, session: DbSession, nifti: BidsNifti): +def import_bids_nifti(env: Env, import_env: BIDSImportEnv, session: DbSession, nifti: BIDSNifti): """ Import a BIDS NIfTI file and its associated files in LORIS. """ @@ -158,7 +158,7 @@ def import_bids_nifti(env: Env, import_env: BidsImportEnv, session: DbSession, n import_env.imported_files_count += 1 -def get_check_nifti_imaging_file_type(env: Env, nifti: BidsNifti) -> str: +def get_check_nifti_imaging_file_type(env: Env, nifti: BIDSNifti) -> str: """ Get the BIDS file type of a NIfTI file and raise an exception if that file type is not registered in the database. @@ -171,7 +171,7 @@ def get_check_nifti_imaging_file_type(env: Env, nifti: BidsNifti) -> str: return file_type -def get_check_nifti_file_hash(env: Env, nifti: BidsNifti) -> str: +def get_check_nifti_file_hash(env: Env, nifti: BIDSNifti) -> str: """ Compute the BLAKE2b hash of a NIfTI file and raise an exception if that hash is already registered in the database. @@ -186,7 +186,7 @@ def get_check_nifti_file_hash(env: Env, nifti: BidsNifti) -> str: return file_hash -def get_nifti_mri_scan_type(env: Env, import_env: BidsImportEnv, nifti: BidsNifti) -> DbMriScanType | None: +def get_nifti_mri_scan_type(env: Env, import_env: BIDSImportEnv, nifti: BIDSNifti) -> DbMriScanType | None: """ Get the MRI scan type corresponding to a NIfTI file using its BIDS suffix. Create the MRI scan type in the database the suffix is a standard BIDS suffix and the scan type does not already diff --git a/python/lib/import_bids_dataset/print.py b/python/lib/import_bids_dataset/print.py index 8ae1fb936..0782a9522 100644 --- a/python/lib/import_bids_dataset/print.py +++ b/python/lib/import_bids_dataset/print.py @@ -1,9 +1,9 @@ from lib.env import Env -from lib.import_bids_dataset.env import BidsImportEnv +from lib.import_bids_dataset.env import BIDSImportEnv from lib.logging import log -def print_bids_import_summary(env: Env, import_env: BidsImportEnv): +def print_bids_import_summary(env: Env, import_env: BIDSImportEnv): """ Print a summary of this BIDS import process. """ From 3a5292c78acb132590b9e7219a3322ed256a0496 Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Sun, 16 Nov 2025 07:43:42 +0000 Subject: [PATCH 09/12] migrate bids importer from str to path --- python/lib/imaging_lib/bids/dataset.py | 90 +++++++++---------- .../imaging_lib/bids/dataset_description.py | 3 +- python/lib/imaging_lib/bids/json.py | 5 +- .../lib/imaging_lib/bids/tsv_participants.py | 7 +- python/lib/imaging_lib/bids/tsv_scans.py | 9 +- python/lib/imaging_lib/nifti.py | 14 +-- python/lib/import_bids_dataset/env.py | 7 +- python/lib/import_bids_dataset/events.py | 3 +- python/lib/import_bids_dataset/main.py | 43 ++++----- python/lib/import_bids_dataset/mri.py | 44 ++++----- python/lib/util/crypto.py | 5 +- python/lib/util/fs.py | 13 +-- 12 files changed, 126 insertions(+), 117 deletions(-) diff --git a/python/lib/imaging_lib/bids/dataset.py b/python/lib/imaging_lib/bids/dataset.py index 1689c45b8..f1fbd3a29 100644 --- a/python/lib/imaging_lib/bids/dataset.py +++ b/python/lib/imaging_lib/bids/dataset.py @@ -1,14 +1,14 @@ -import os import re from collections.abc import Iterator from functools import cached_property +from pathlib import Path from bids import BIDSLayout from lib.imaging_lib.bids.dataset_description import BidsDatasetDescription from lib.imaging_lib.bids.tsv_participants import BidsTsvParticipant, read_bids_participants_tsv_file from lib.imaging_lib.bids.tsv_scans import BidsTsvScan, read_bids_scans_tsv_file -from lib.imaging_lib.nifti import find_dir_nifti_names +from lib.imaging_lib.nifti import find_dir_nifti_files from lib.util.fs import replace_file_extension, search_dir_file_with_regex from lib.util.iter import find @@ -18,10 +18,10 @@ class BIDSDataset: - path: str + path: Path validate: bool - def __init__(self, bids_path: str, validate: bool): + def __init__(self, bids_path: Path, validate: bool): self.path = bids_path self.validate = validate @@ -48,12 +48,12 @@ def subjects(self) -> list['BIDSSubject']: subjects: list[BIDSSubject] = [] - for file in os.scandir(self.path): + for file in self.path.iterdir(): subject_match = re.match(r'sub-([a-zA-Z0-9]+)', file.name) if subject_match is None: continue - if not os.path.isdir(file): + if not file.is_dir(): continue subject_label = subject_match.group(1) @@ -68,8 +68,8 @@ def get_dataset_description(self) -> 'BidsDatasetDescription | None': does contains incorrect data. """ - dataset_description_path = os.path.join(self.path, 'dataset_description.json') - if not os.path.exists(dataset_description_path): + dataset_description_path = self.path / 'dataset_description.json' + if not dataset_description_path.exists(): return None return BidsDatasetDescription(dataset_description_path) @@ -81,8 +81,8 @@ def tsv_participants(self) -> dict[str, BidsTsvParticipant] | None: present. This property might raise an exception if the file is present but incorrect. """ - tsv_participants_path = os.path.join(self.path, 'participants.tsv') - if not os.path.exists(tsv_participants_path): + tsv_participants_path = self.path / 'participants.tsv' + if not tsv_participants_path.exists(): return None return read_bids_participants_tsv_file(tsv_participants_path) @@ -143,13 +143,13 @@ def layout(self) -> BIDSLayout: class BIDSSubject: root_dataset: BIDSDataset + path: Path label: str - path: str def __init__(self, root_dataset: BIDSDataset, label: str): self.root_dataset = root_dataset self.label = label - self.path = os.path.join(self.root_dataset.path, f'sub-{self.label}') + self.path = self.root_dataset.path / f'sub-{self.label}' @property def data_types(self) -> Iterator['BIDSDataType']: @@ -169,8 +169,8 @@ def sessions(self) -> list['BIDSSession']: sessions: list[BIDSSession] = [] - for file in os.scandir(self.path): - if not os.path.isdir(file): + for file in self.path.iterdir(): + if not file.is_dir(): continue session_match = re.match(r'ses-([a-zA-Z0-9]+)', file.name) @@ -195,23 +195,19 @@ def get_session(self, session_label: str) -> 'BIDSSession | None': class BIDSSession: subject: BIDSSubject + path: Path label: str | None - path: str - tsv_scans_path: str | None + tsv_scans_path: Path | None def __init__(self, subject: BIDSSubject, label: str | None): self.subject = subject self.label = label - if label is None: - self.path = self.subject.path + if label is not None: + self.path = subject.path / f'ses-{self.label}' else: - self.path = os.path.join(self.subject.path, f'ses-{self.label}') + self.path = subject.path - tsv_scans_name = search_dir_file_with_regex(self.path, r'scans.tsv$') - if tsv_scans_name is not None: - self.tsv_scans_path = os.path.join(self.path, tsv_scans_name) - else: - self.tsv_scans_path = None + self.tsv_scans_path = search_dir_file_with_regex(self.path, r'scans.tsv$') @property def root_dataset(self) -> BIDSDataset: @@ -230,8 +226,8 @@ def data_types(self) -> list['BIDSDataType']: data_types: list[BIDSDataType] = [] - for file in os.scandir(self.path): - if not os.path.isdir(file): + for file in self.path.iterdir(): + if not file.is_dir(): continue data_types.append(BIDSDataType(self, file.name)) @@ -264,13 +260,15 @@ def get_tsv_scan(self, file_name: str) -> 'BidsTsvScan | None': class BIDSDataType: session: BIDSSession - name: str - path: str + path: Path def __init__(self, session: BIDSSession, name: str): self.session = session - self.name = name - self.path = os.path.join(self.session.path, self.name) + self.path = session.path / name + + @property + def name(self) -> str: + return self.path.name @property def root_dataset(self) -> BIDSDataset: @@ -288,22 +286,20 @@ def niftis(self) -> list['BIDSNifti']: niftis: list[BIDSNifti] = [] - for nifti_name in find_dir_nifti_names(self.path): - niftis.append(BIDSNifti(self, nifti_name)) + for nifti_path in find_dir_nifti_files(self.path): + niftis.append(BIDSNifti(self, nifti_path.name)) return niftis class BIDSNifti: data_type: BIDSDataType - name: str - path: str + path: Path suffix: str | None def __init__(self, data_type: BIDSDataType, name: str): self.data_type = data_type - self.path = os.path.join(self.data_type.path, name) - self.name = name + self.path = data_type.path / name suffix_match = re.search(r'_([a-zA-Z0-9]+)\.nii(\.gz)?$', self.name) if suffix_match is not None: @@ -311,6 +307,10 @@ def __init__(self, data_type: BIDSDataType, name: str): else: self.suffix = None + @property + def name(self) -> str: + return self.path.name + @property def root_dataset(self) -> BIDSDataset: return self.data_type.root_dataset @@ -323,38 +323,38 @@ def subject(self) -> BIDSSubject: def session(self) -> BIDSSession: return self.data_type.session - def get_json_path(self) -> str | None: + def get_json_path(self) -> Path | None: """ Get the JSON sidecar file path of this NIfTI file if it exists. """ json_name = replace_file_extension(self.name, 'json') - json_path = os.path.join(self.data_type.path, json_name) - if not os.path.exists(json_path): + json_path = self.data_type.path / json_name + if not json_path.exists(): return None return json_path - def get_bval_path(self) -> str | None: + def get_bval_path(self) -> Path | None: """ Get the BVAL file path of this NIfTI file if it exists. """ bval_name = replace_file_extension(self.name, 'bval') - bval_path = os.path.join(self.data_type.path, bval_name) - if not os.path.exists(bval_path): + bval_path = self.data_type.path / bval_name + if not bval_path.exists(): return None return bval_path - def get_bvec_path(self) -> str | None: + def get_bvec_path(self) -> Path | None: """ Get the BVEC file path of this NIfTI file if it exists. """ bvec_name = replace_file_extension(self.name, 'bvec') - bvec_path = os.path.join(self.data_type.path, bvec_name) - if not os.path.exists(bvec_path): + bvec_path = self.data_type.path / bvec_name + if not bvec_path.exists(): return None return bvec_path diff --git a/python/lib/imaging_lib/bids/dataset_description.py b/python/lib/imaging_lib/bids/dataset_description.py index 15c1b8a0b..e9ee520ff 100644 --- a/python/lib/imaging_lib/bids/dataset_description.py +++ b/python/lib/imaging_lib/bids/dataset_description.py @@ -1,4 +1,5 @@ import json +from pathlib import Path from typing import Any @@ -31,7 +32,7 @@ class BidsDatasetDescription: The BIDS dataset description JSON data. """ - def __init__(self, dataset_descrption_path: str): + def __init__(self, dataset_descrption_path: Path): """ Read a BIDS dataset description file, or raise an exception if that file contains incorrect data. diff --git a/python/lib/imaging_lib/bids/json.py b/python/lib/imaging_lib/bids/json.py index 3ecfa9184..b04e57f4d 100644 --- a/python/lib/imaging_lib/bids/json.py +++ b/python/lib/imaging_lib/bids/json.py @@ -1,4 +1,5 @@ import json +from pathlib import Path from typing import Any from lib.config import get_patient_id_dicom_header_config @@ -41,7 +42,7 @@ def get_bids_json_session_info(env: Env, bids_json: dict[str, Any]) -> SessionIn return get_session_info(env, patient_id, scanner_info) -def add_bids_json_file_parameters(env: Env, bids_json_path: str, rel_json_path: str, file_parameters: dict[str, Any]): +def add_bids_json_file_parameters(env: Env, bids_json_path: Path, rel_json_path: Path, file_parameters: dict[str, Any]): """ Read a BIDS JSON sidecar file and add its parameters to a LORIS file parameters dictionary. """ @@ -52,5 +53,5 @@ def add_bids_json_file_parameters(env: Env, bids_json_path: str, rel_json_path: json_blake2 = compute_file_blake2b_hash(bids_json_path) - file_parameters['bids_json_file'] = rel_json_path + file_parameters['bids_json_file'] = str(rel_json_path) file_parameters['bids_json_file_blake2b_hash'] = json_blake2 diff --git a/python/lib/imaging_lib/bids/tsv_participants.py b/python/lib/imaging_lib/bids/tsv_participants.py index f0deafe81..6a4c3ab72 100644 --- a/python/lib/imaging_lib/bids/tsv_participants.py +++ b/python/lib/imaging_lib/bids/tsv_participants.py @@ -1,6 +1,7 @@ import csv import re from dataclasses import dataclass +from pathlib import Path from dateutil.parser import ParserError, parse @@ -21,7 +22,7 @@ class BidsTsvParticipant: project: str | None = None -def read_bids_participants_tsv_file(participants_tsv_path: str) -> dict[str, BidsTsvParticipant]: +def read_bids_participants_tsv_file(participants_tsv_path: Path) -> dict[str, BidsTsvParticipant]: """ Read the `participants.tsv` file of a BIDS dataset and get the participant rows indexed by participant ID. Raise an exception if the `participants.tsv` file is incorrect. @@ -42,7 +43,7 @@ def read_bids_participants_tsv_file(participants_tsv_path: str) -> dict[str, Bid def read_bids_participants_tsv_row( tsv_participant_row: dict[str, str], - participants_tsv_path: str, + participants_tsv_path: Path, ) -> BidsTsvParticipant: """ Read a `participants.tsv` row, or raise an exception if that row is incorrect. @@ -70,7 +71,7 @@ def read_bids_participants_tsv_row( ) -def write_bids_participants_tsv_file(tsv_participants: dict[str, BidsTsvParticipant], participants_file_path: str): +def write_bids_participants_tsv_file(tsv_participants: dict[str, BidsTsvParticipant], participants_file_path: Path): """ Write the `participants.tsv` file based from a set of participant rows. """ diff --git a/python/lib/imaging_lib/bids/tsv_scans.py b/python/lib/imaging_lib/bids/tsv_scans.py index 82c319de8..052332261 100644 --- a/python/lib/imaging_lib/bids/tsv_scans.py +++ b/python/lib/imaging_lib/bids/tsv_scans.py @@ -1,6 +1,7 @@ import csv from dataclasses import dataclass from datetime import datetime +from pathlib import Path from typing import Any from dateutil.parser import ParserError, parse @@ -19,7 +20,7 @@ class BidsTsvScan: age_at_scan : str | None -def read_bids_scans_tsv_file(scans_tsv_path: str) -> dict[str, BidsTsvScan]: +def read_bids_scans_tsv_file(scans_tsv_path: Path) -> dict[str, BidsTsvScan]: """ Read a `scans.tsv` file of a BIDS dataset and get the scan rows indexed by file name. Raise an exception if the `scans.tsv` file is incorrect. @@ -38,7 +39,7 @@ def read_bids_scans_tsv_file(scans_tsv_path: str) -> dict[str, BidsTsvScan]: return tsv_scans -def read_bids_scans_tsv_row(tsv_scan_row: dict[str, str], scans_tsv_path: str) -> BidsTsvScan: +def read_bids_scans_tsv_row(tsv_scan_row: dict[str, str], scans_tsv_path: Path) -> BidsTsvScan: """ Read a `scans.tsv` row, or raise an exception if that row is incorrect. """ @@ -57,7 +58,7 @@ def read_bids_scans_tsv_row(tsv_scan_row: dict[str, str], scans_tsv_path: str) - ) -def write_bids_scans_tsv_file(tsv_scans: dict[str, BidsTsvScan], scans_tsv_path: str): +def write_bids_scans_tsv_file(tsv_scans: dict[str, BidsTsvScan], scans_tsv_path: Path): """ Write the `scans.tsv` file from a set of scan rows. """ @@ -115,7 +116,7 @@ def _read_age_at_scan(tsv_scan_row: dict[str, str]) -> str | None: return None -def add_scan_tsv_file_parameters(scan_tsv: BidsTsvScan, scans_tsv_path: str, file_parameters: dict[str, Any]): +def add_scan_tsv_file_parameters(scan_tsv: BidsTsvScan, scans_tsv_path: Path, file_parameters: dict[str, Any]): """ Add a scans.tsv file and row parameters to a LORIS file parameters dictionary. """ diff --git a/python/lib/imaging_lib/nifti.py b/python/lib/imaging_lib/nifti.py index 3a93f1109..0edb76897 100644 --- a/python/lib/imaging_lib/nifti.py +++ b/python/lib/imaging_lib/nifti.py @@ -1,11 +1,11 @@ -import os from collections.abc import Iterator +from pathlib import Path from typing import Any, cast import nibabel as nib -def add_nifti_file_parameters(nifti_path: str, nifti_file_hash: str, file_parameters: dict[str, Any]): +def add_nifti_file_parameters(nifti_path: Path, nifti_file_hash: str, file_parameters: dict[str, Any]): """ Read a NIfTI image and add some of its properties to the file parameters. """ @@ -34,11 +34,11 @@ def add_nifti_file_parameters(nifti_path: str, nifti_file_hash: str, file_parame file_parameters['file_blake2b_hash'] = nifti_file_hash -def find_dir_nifti_names(dir_path: str) -> Iterator[str]: +def find_dir_nifti_files(dir_path: Path) -> Iterator[Path]: """ - Iterate over the names of the NIfTI files found in a directory. + Iterate over the Path objects of the NIfTI files found in a directory. """ - for file_name in os.listdir(dir_path): - if file_name.endswith(('.nii', '.nii.gz')): - yield file_name + for item_path in dir_path.iterdir(): + if item_path.name.endswith(('.nii', '.nii.gz')): + yield item_path diff --git a/python/lib/import_bids_dataset/env.py b/python/lib/import_bids_dataset/env.py index 1db2491f5..85b98df8e 100644 --- a/python/lib/import_bids_dataset/env.py +++ b/python/lib/import_bids_dataset/env.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from pathlib import Path @dataclass @@ -7,15 +8,15 @@ class BIDSImportEnv: Pipeline-specific variables of the BIDS dataset import pipeline. """ - data_dir_path : str - loris_bids_path : str | None + data_dir_path : Path + loris_bids_path : Path | None total_files_count : int imported_files_count : int ignored_files_count : int failed_files_count : int unknown_scan_types : list[str] - def __init__(self, data_dir_path: str, loris_bids_path: str | None, total_files_count: int): + def __init__(self, data_dir_path: Path, loris_bids_path: Path | None, total_files_count: int): self.data_dir_path = data_dir_path self.loris_bids_path = loris_bids_path self.total_files_count = total_files_count diff --git a/python/lib/import_bids_dataset/events.py b/python/lib/import_bids_dataset/events.py index 5d72d0d3d..b705683b9 100644 --- a/python/lib/import_bids_dataset/events.py +++ b/python/lib/import_bids_dataset/events.py @@ -1,5 +1,6 @@ import json import os +from pathlib import Path from typing import Any import lib.utilities @@ -17,7 +18,7 @@ def get_events_metadata( args: Args, bids: BIDSDataset, legacy_db: Database, - loris_bids_path: str | None, + loris_bids_path: Path | None, project_id: int, ) -> dict[Any, Any]: """ diff --git a/python/lib/import_bids_dataset/main.py b/python/lib/import_bids_dataset/main.py index 0d57236d7..5a723eaae 100644 --- a/python/lib/import_bids_dataset/main.py +++ b/python/lib/import_bids_dataset/main.py @@ -1,6 +1,7 @@ import os import re import shutil +from pathlib import Path from typing import Any from lib.config import get_data_dir_path_config, get_default_bids_visit_label_config @@ -46,11 +47,11 @@ def import_bids_dataset(env: Env, args: Args, legacy_db: Database): Read the provided BIDS dataset and import it into LORIS. """ - data_dir_path = get_data_dir_path_config(env) + data_dir_path = Path(get_data_dir_path_config(env)) log(env, "Parsing BIDS dataset...") - bids = BIDSDataset(args.source_bids_path, args.bids_validation) + bids = BIDSDataset(Path(args.source_bids_path), args.bids_validation) niftis_count = count(bids.niftis) @@ -85,14 +86,14 @@ def import_bids_dataset(env: Env, args: Args, legacy_db: Database): # Copy the `participants.tsv` file rows. if loris_bids_path is not None and bids.tsv_participants is not None: - loris_participants_tsv_path = os.path.join(loris_bids_path, 'participants.tsv') + loris_participants_tsv_path = loris_bids_path / 'participants.tsv' copy_bids_tsv_participants(bids.tsv_participants, loris_participants_tsv_path) # Process each session directory. import_env = BIDSImportEnv( - data_dir_path = data_dir_path, - loris_bids_path = loris_bids_path, + data_dir_path = data_dir_path, + loris_bids_path = loris_bids_path, total_files_count = niftis_count, ) @@ -143,11 +144,11 @@ def import_bids_session( tsv_scans = bids_session.tsv_scans if import_env.loris_bids_path is not None and tsv_scans is not None: - loris_scans_tsv_path = os.path.join( - import_env.loris_bids_path, - f'sub-{bids_session.subject.label}', - f'ses-{bids_session.label}', - f'sub-{bids_session.subject.label}_ses-{bids_session.label}_scans.tsv', + loris_scans_tsv_path = ( + import_env.loris_bids_path + / f'sub-{bids_session.subject.label}' + / f'ses-{bids_session.label}' + / f'sub-{bids_session.subject.label}_ses-{bids_session.label}_scans.tsv' ) copy_bids_tsv_scans(tsv_scans, loris_scans_tsv_path) @@ -239,40 +240,40 @@ def import_bids_eeg_data_type_files( data_type = data_type, db = legacy_db, verbose = env.verbose, - data_dir = import_env.data_dir_path, + data_dir = str(import_env.data_dir_path), session = session, loris_bids_eeg_rel_dir = loris_data_type_dir_rel_path, - loris_bids_root_dir = import_env.loris_bids_path, + loris_bids_root_dir = str(import_env.loris_bids_path), dataset_tag_dict = events_metadata, dataset_type = args.type, ) -def copy_bids_tsv_participants(tsv_participants: dict[str, BidsTsvParticipant], loris_participants_tsv_path: str): +def copy_bids_tsv_participants(tsv_participants: dict[str, BidsTsvParticipant], loris_participants_tsv_path: Path): """ Copy some participants.tsv rows into the LORIS participants.tsv file, creating it if necessary. """ - if os.path.exists(loris_participants_tsv_path): + if loris_participants_tsv_path.exists(): loris_tsv_participants = read_bids_participants_tsv_file(loris_participants_tsv_path) merge_bids_tsv_participants(tsv_participants, loris_tsv_participants) write_bids_participants_tsv_file(tsv_participants, loris_participants_tsv_path) -def copy_bids_tsv_scans(tsv_scans: dict[str, BidsTsvScan], loris_scans_tsv_path: str): +def copy_bids_tsv_scans(tsv_scans: dict[str, BidsTsvScan], loris_scans_tsv_path: Path): """ Copy some scans.tsv rows into a LORIS scans.tsv file, creating it if necessary. """ - if os.path.exists(loris_scans_tsv_path): + if loris_scans_tsv_path.exists(): loris_tsv_scans = read_bids_scans_tsv_file(loris_scans_tsv_path) merge_bids_tsv_scans(tsv_scans, loris_tsv_scans) write_bids_scans_tsv_file(tsv_scans, loris_scans_tsv_path) -def copy_static_dataset_files(source_bids_path: str, loris_bids_path: str): +def copy_static_dataset_files(source_bids_path: Path, loris_bids_path: Path): """ Copy the static files of the source BIDS dataset to the LORIS BIDS dataset. """ @@ -286,7 +287,7 @@ def copy_static_dataset_files(source_bids_path: str, loris_bids_path: str): shutil.copyfile(source_file_path, loris_file_path) -def get_loris_bids_path(env: Env, bids: BIDSDataset, data_dir_path: str) -> str: +def get_loris_bids_path(env: Env, bids: BIDSDataset, data_dir_path: Path) -> Path: """ Get the LORIS BIDS directory path for the BIDS dataset to import, and create that directory if it does not exist yet. @@ -307,9 +308,9 @@ def get_loris_bids_path(env: Env, bids: BIDSDataset, data_dir_path: str) -> str: dataset_name = re.sub(r'[^0-9a-zA-Z]+', '_', dataset_description.name) dataset_version = re.sub(r'[^0-9a-zA-Z\.]+', '_', dataset_description.bids_version) - loris_bids_path = os.path.join(data_dir_path, 'bids_imports', f'{dataset_name}_BIDSVersion_{dataset_version}') + loris_bids_path = data_dir_path / 'bids_imports' / f'{dataset_name}_BIDSVersion_{dataset_version}' - if not os.path.exists(loris_bids_path): - os.mkdir(loris_bids_path) + if not loris_bids_path.exists(): + loris_bids_path.mkdir() return loris_bids_path diff --git a/python/lib/import_bids_dataset/mri.py b/python/lib/import_bids_dataset/mri.py index b5b9a7080..0737dd42a 100644 --- a/python/lib/import_bids_dataset/mri.py +++ b/python/lib/import_bids_dataset/mri.py @@ -1,5 +1,5 @@ -import os import shutil +from pathlib import Path from typing import Any, cast from lib.db.models.mri_scan_type import DbMriScanType @@ -59,20 +59,21 @@ def import_bids_nifti(env: Env, import_env: BIDSImportEnv, session: DbSession, n # Get the path at which to copy the file. - loris_file_dir_path = os.path.join( - cast(str, import_env.loris_bids_path), - f'sub-{session.candidate.psc_id}', - f'ses-{session.visit_label}', - nifti.data_type.name, + loris_file_dir_path = ( + # The LORIS BIDS path should not be `None` since `--no-copy` is not supported for MRI acquisitions yet. + cast(Path, import_env.loris_bids_path) + / f'sub-{session.candidate.psc_id}' + / f'ses-{session.visit_label}' + / nifti.data_type.name ) - loris_file_path = os.path.join(loris_file_dir_path, nifti.name) + loris_file_path = loris_file_dir_path / nifti.name - loris_file_rel_path = os.path.relpath(loris_file_path, import_env.data_dir_path) + loris_file_rel_path = loris_file_path.relative_to(import_env.data_dir_path) # Check whether the file is already registered in LORIS. - loris_file = try_get_file_with_rel_path(env.db, loris_file_rel_path) + loris_file = try_get_file_with_rel_path(env.db, str(loris_file_rel_path)) if loris_file is not None: import_env.ignored_files_count += 1 log(env, f"File '{loris_file_rel_path}' is already registered in LORIS. Skipping.") @@ -86,7 +87,7 @@ def import_bids_nifti(env: Env, import_env: BIDSImportEnv, session: DbSession, n # Get the auxiliary files. - aux_file_paths: list[str] = [] + aux_file_paths: list[Path] = [] json_path = nifti.get_json_path() @@ -103,8 +104,8 @@ def import_bids_nifti(env: Env, import_env: BIDSImportEnv, session: DbSession, n file_parameters: dict[str, Any] = {} if json_path is not None: - json_loris_path = os.path.join(loris_file_dir_path, os.path.basename(json_path)) - json_loris_rel_path = os.path.relpath(json_loris_path, import_env.data_dir_path) + json_loris_path = loris_file_dir_path / json_path.name + json_loris_rel_path = json_loris_path.relative_to(import_env.data_dir_path) add_bids_json_file_parameters(env, json_path, json_loris_rel_path, file_parameters) add_nifti_file_parameters(nifti.path, file_hash, file_parameters) @@ -113,11 +114,11 @@ def import_bids_nifti(env: Env, import_env: BIDSImportEnv, session: DbSession, n add_scan_tsv_file_parameters(tsv_scan, nifti.session.tsv_scans_path, file_parameters) for aux_file_path in aux_file_paths: - aux_file_type = get_file_extension(aux_file_path) + aux_file_type = get_file_extension(aux_file_path.name) aux_file_hash = compute_file_blake2b_hash(aux_file_path) - aux_file_loris_path = os.path.join(loris_file_dir_path, os.path.basename(aux_file_path)) - aux_file_loris_rel_path = os.path.relpath(aux_file_loris_path, import_env.data_dir_path) - file_parameters[f'bids_{aux_file_type}'] = aux_file_loris_rel_path + aux_file_loris_path = loris_file_dir_path / aux_file_path.name + aux_file_loris_rel_path = aux_file_loris_path.relative_to(import_env.data_dir_path) + file_parameters[f'bids_{aux_file_type}'] = str(aux_file_loris_rel_path) file_parameters[f'bids_{aux_file_type}_blake2b_hash'] = aux_file_hash # Copy the files on the file system. @@ -139,7 +140,7 @@ def import_bids_nifti(env: Env, import_env: BIDSImportEnv, session: DbSession, n file = register_imaging_file( env, file_type, - loris_file_rel_path, + str(loris_file_rel_path), session, mri_scan_type, echo_time, @@ -209,16 +210,15 @@ def get_nifti_mri_scan_type(env: Env, import_env: BIDSImportEnv, nifti: BIDSNift return create_mri_scan_type(env, nifti.suffix) -def copy_bids_file(loris_file_dir_path: str, file_path: str): +def copy_bids_file(loris_file_dir_path: Path, file_path: Path): """ Copy a BIDS file to a directory. """ - file_name = os.path.basename(file_path) - loris_file_path = os.path.join(loris_file_dir_path, file_name) + loris_file_path = loris_file_dir_path / file_path.name - if os.path.exists(loris_file_path): + if loris_file_path.exists(): raise Exception(f"File '{loris_file_path}' already exists in LORIS.") - os.makedirs(loris_file_dir_path, exist_ok=True) + loris_file_dir_path.mkdir(exist_ok=True) shutil.copyfile(file_path, loris_file_path) diff --git a/python/lib/util/crypto.py b/python/lib/util/crypto.py index 72a790512..84c25910c 100644 --- a/python/lib/util/crypto.py +++ b/python/lib/util/crypto.py @@ -1,7 +1,8 @@ import hashlib +from pathlib import Path -def compute_file_blake2b_hash(file_path: str) -> str: +def compute_file_blake2b_hash(file_path: Path | str) -> str: """ Compute the BLAKE2b hash of a file. """ @@ -15,7 +16,7 @@ def compute_file_blake2b_hash(file_path: str) -> str: return hash.hexdigest() -def compute_file_md5_hash(file_path: str) -> str: +def compute_file_md5_hash(file_path: Path | str) -> str: """ Compute the MD5 hash of a file. """ diff --git a/python/lib/util/fs.py b/python/lib/util/fs.py index 126a7c217..a5193b272 100644 --- a/python/lib/util/fs.py +++ b/python/lib/util/fs.py @@ -5,6 +5,7 @@ import tempfile from collections.abc import Iterator from datetime import datetime +from pathlib import Path import lib.exitcode from lib.env import Env @@ -103,14 +104,14 @@ def replace_file_extension(file_name: str, extension: str) -> str: return f'{parts[0]}.{extension}' -def search_dir_file_with_regex(dir_path: str, regex: str) -> str | None: +def search_dir_file_with_regex(dir_path: Path, regex: str) -> Path | None: """ - Search for a file within a directory whose name matches a regular expression, or return `None` - if no such file is found. + Search for a file or directory within a directory whose name matches a regular expression, or + return `None` if no such file is found. """ - for file in os.scandir(dir_path): - if re.search(regex, file.name): - return file.name + for file_path in dir_path.iterdir(): + if re.search(regex, file_path.name): + return file_path return None From eb855707cea1d177ef7274eaf4eca01e16259c4e Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Sun, 16 Nov 2025 11:56:12 +0000 Subject: [PATCH 10/12] add specialized data types --- python/lib/eeg.py | 4 +- python/lib/imaging_lib/bids/dataset.py | 54 ++++++++++++++++++++------ python/lib/import_bids_dataset/main.py | 19 ++++----- 3 files changed, 55 insertions(+), 22 deletions(-) diff --git a/python/lib/eeg.py b/python/lib/eeg.py index ccdb3e313..279e836e6 100644 --- a/python/lib/eeg.py +++ b/python/lib/eeg.py @@ -14,7 +14,7 @@ from lib.database_lib.physiological_modality import PhysiologicalModality from lib.database_lib.physiological_output_type import PhysiologicalOutputType from lib.db.models.session import DbSession -from lib.imaging_lib.bids.dataset import BIDSDataType +from lib.imaging_lib.bids.dataset import BIDSEEGDataType from lib.physiological import Physiological from lib.util.crypto import compute_file_blake2b_hash @@ -26,7 +26,7 @@ class Eeg: """ def __init__( - self, data_type: BIDSDataType, session: DbSession, db: Database, verbose: bool, data_dir: str, + self, data_type: BIDSEEGDataType, session: DbSession, db: Database, verbose: bool, data_dir: str, loris_bids_eeg_rel_dir: str, loris_bids_root_dir: str | None, dataset_tag_dict: dict[Any, Any], dataset_type: Literal['raw', 'derivative'] | None, ): diff --git a/python/lib/imaging_lib/bids/dataset.py b/python/lib/imaging_lib/bids/dataset.py index f1fbd3a29..a0f7fb1b7 100644 --- a/python/lib/imaging_lib/bids/dataset.py +++ b/python/lib/imaging_lib/bids/dataset.py @@ -38,7 +38,8 @@ def data_types(self) -> Iterator['BIDSDataType']: @property def niftis(self) -> Iterator['BIDSNifti']: for data_type in self.data_types: - yield from data_type.niftis + if isinstance(data_type, BIDSMRIDataType): + yield from data_type.niftis @cached_property def subjects(self) -> list['BIDSSubject']: @@ -159,7 +160,8 @@ def data_types(self) -> Iterator['BIDSDataType']: @property def niftis(self) -> Iterator['BIDSNifti']: for data_type in self.data_types: - yield from data_type.niftis + if isinstance(data_type, BIDSMRIDataType): + yield from data_type.niftis @cached_property def sessions(self) -> list['BIDSSession']: @@ -215,25 +217,48 @@ def root_dataset(self) -> BIDSDataset: @property def niftis(self) -> Iterator['BIDSNifti']: - for data_type in self.data_types: + for data_type in self.mri_data_types: yield from data_type.niftis @cached_property - def data_types(self) -> list['BIDSDataType']: + def mri_data_types(self) -> list['BIDSMRIDataType']: """ - The data type directories found in this session directory. + The MRI data type directories found in this session directory. """ - data_types: list[BIDSDataType] = [] + data_types: list[BIDSMRIDataType] = [] - for file in self.path.iterdir(): - if not file.is_dir(): - continue + for data_type_name in ['anat', 'dwi', 'fmap', 'func']: + data_type_path = self.path / data_type_name + if data_type_path.is_dir(): + data_types.append(BIDSMRIDataType(self, data_type_name)) + + return data_types + + @cached_property + def eeg_data_types(self) -> list['BIDSEEGDataType']: + """ + The MRI data type directories found in this session directory. + """ + + data_types: list[BIDSEEGDataType] = [] - data_types.append(BIDSDataType(self, file.name)) + for data_type_name in ['eeg', 'ieeg']: + data_type_path = self.path / data_type_name + if data_type_path.is_dir(): + data_types.append(BIDSEEGDataType(self, data_type_name)) return data_types + @property + def data_types(self) -> Iterator['BIDSDataType']: + """ + The data type directories found in this session directory. + """ + + yield from self.mri_data_types + yield from self.eeg_data_types + @cached_property def tsv_scans(self) -> dict[str, BidsTsvScan] | None: """ @@ -278,10 +303,17 @@ def root_dataset(self) -> BIDSDataset: def subject(self) -> BIDSSubject: return self.session.subject + +# TODO: Complete with EEG-specific content. +class BIDSEEGDataType(BIDSDataType): + pass + + +class BIDSMRIDataType(BIDSDataType): @cached_property def niftis(self) -> list['BIDSNifti']: """ - The NIfTI files found in this data type directory. + The NIfTI files found in this MRI data type directory. """ niftis: list[BIDSNifti] = [] diff --git a/python/lib/import_bids_dataset/main.py b/python/lib/import_bids_dataset/main.py index 5a723eaae..9b528612e 100644 --- a/python/lib/import_bids_dataset/main.py +++ b/python/lib/import_bids_dataset/main.py @@ -11,7 +11,7 @@ from lib.db.queries.session import try_get_session_with_cand_id_visit_label from lib.eeg import Eeg from lib.env import Env -from lib.imaging_lib.bids.dataset import BIDSDataset, BIDSDataType, BIDSSession +from lib.imaging_lib.bids.dataset import BIDSDataset, BIDSDataType, BIDSEEGDataType, BIDSMRIDataType, BIDSSession from lib.imaging_lib.bids.dataset_description import BidsDatasetDescriptionError from lib.imaging_lib.bids.tsv_participants import ( BidsTsvParticipant, @@ -177,12 +177,13 @@ def import_bids_data_type_files( Read the provided BIDS data type directory and import it into LORIS. """ - if data_type.name in BIDS_MRI_DATA_TYPES: - import_bids_mri_data_type_files(env, import_env, args, session, data_type) - elif data_type.name in BIDS_EEG_DATA_TYPES: - import_bids_eeg_data_type_files(env, import_env, args, session, data_type, events_metadata, legacy_db) - else: - log_warning(env, f"Unknown data type '{data_type.name}'. Skipping.") + match data_type: + case BIDSMRIDataType(): + import_bids_mri_data_type_files(env, import_env, args, session, data_type) + case BIDSEEGDataType(): + import_bids_eeg_data_type_files(env, import_env, args, session, data_type, events_metadata, legacy_db) + case _: + log_warning(env, f"Unknown data type '{data_type.name}'. Skipping.") def import_bids_mri_data_type_files( @@ -190,7 +191,7 @@ def import_bids_mri_data_type_files( import_env: BIDSImportEnv, args: Args, session: DbSession, - data_type: BIDSDataType, + data_type: BIDSMRIDataType, ): """ Read the BIDS MRI data type directory and import its files into LORIS. @@ -222,7 +223,7 @@ def import_bids_eeg_data_type_files( import_env: BIDSImportEnv, args: Args, session: DbSession, - data_type: BIDSDataType, + data_type: BIDSEEGDataType, events_metadata: dict[Any, Any], legacy_db: Database, ): From 80657646b8ef0fc26bc55c3976abf127c8536807 Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Sun, 16 Nov 2025 12:27:21 +0000 Subject: [PATCH 11/12] add modalities-specific bids modules --- python/lib/eeg.py | 2 +- python/lib/imaging_lib/bids/dataset.py | 103 +++------------------ python/lib/imaging_lib/bids/eeg/dataset.py | 6 ++ python/lib/imaging_lib/bids/mri/dataset.py | 90 ++++++++++++++++++ python/lib/import_bids_dataset/main.py | 4 +- python/lib/import_bids_dataset/mri.py | 2 +- 6 files changed, 114 insertions(+), 93 deletions(-) create mode 100644 python/lib/imaging_lib/bids/eeg/dataset.py create mode 100644 python/lib/imaging_lib/bids/mri/dataset.py diff --git a/python/lib/eeg.py b/python/lib/eeg.py index 279e836e6..ca7ca7d72 100644 --- a/python/lib/eeg.py +++ b/python/lib/eeg.py @@ -14,7 +14,7 @@ from lib.database_lib.physiological_modality import PhysiologicalModality from lib.database_lib.physiological_output_type import PhysiologicalOutputType from lib.db.models.session import DbSession -from lib.imaging_lib.bids.dataset import BIDSEEGDataType +from lib.imaging_lib.bids.eeg.dataset import BIDSEEGDataType from lib.physiological import Physiological from lib.util.crypto import compute_file_blake2b_hash diff --git a/python/lib/imaging_lib/bids/dataset.py b/python/lib/imaging_lib/bids/dataset.py index a0f7fb1b7..e920a93df 100644 --- a/python/lib/imaging_lib/bids/dataset.py +++ b/python/lib/imaging_lib/bids/dataset.py @@ -2,16 +2,21 @@ from collections.abc import Iterator from functools import cached_property from pathlib import Path +from typing import TYPE_CHECKING from bids import BIDSLayout from lib.imaging_lib.bids.dataset_description import BidsDatasetDescription from lib.imaging_lib.bids.tsv_participants import BidsTsvParticipant, read_bids_participants_tsv_file from lib.imaging_lib.bids.tsv_scans import BidsTsvScan, read_bids_scans_tsv_file -from lib.imaging_lib.nifti import find_dir_nifti_files -from lib.util.fs import replace_file_extension, search_dir_file_with_regex +from lib.util.fs import search_dir_file_with_regex from lib.util.iter import find +if TYPE_CHECKING: + from lib.imaging_lib.bids.eeg.dataset import BIDSEEGDataType + from lib.imaging_lib.bids.mri.dataset import BIDSMRIDataType, BIDSNifti + + PYBIDS_IGNORE = ['code', 'sourcedata', 'log', '.git'] PYBIDS_FORCE = [re.compile(r"_annotations\.(tsv|json)$")] @@ -37,6 +42,7 @@ def data_types(self) -> Iterator['BIDSDataType']: @property def niftis(self) -> Iterator['BIDSNifti']: + from lib.imaging_lib.bids.mri.dataset import BIDSMRIDataType for data_type in self.data_types: if isinstance(data_type, BIDSMRIDataType): yield from data_type.niftis @@ -159,6 +165,7 @@ def data_types(self) -> Iterator['BIDSDataType']: @property def niftis(self) -> Iterator['BIDSNifti']: + from lib.imaging_lib.bids.mri.dataset import BIDSMRIDataType for data_type in self.data_types: if isinstance(data_type, BIDSMRIDataType): yield from data_type.niftis @@ -226,6 +233,8 @@ def mri_data_types(self) -> list['BIDSMRIDataType']: The MRI data type directories found in this session directory. """ + from lib.imaging_lib.bids.mri.dataset import BIDSMRIDataType + data_types: list[BIDSMRIDataType] = [] for data_type_name in ['anat', 'dwi', 'fmap', 'func']: @@ -241,6 +250,8 @@ def eeg_data_types(self) -> list['BIDSEEGDataType']: The MRI data type directories found in this session directory. """ + from lib.imaging_lib.bids.eeg.dataset import BIDSEEGDataType + data_types: list[BIDSEEGDataType] = [] for data_type_name in ['eeg', 'ieeg']: @@ -302,91 +313,3 @@ def root_dataset(self) -> BIDSDataset: @property def subject(self) -> BIDSSubject: return self.session.subject - - -# TODO: Complete with EEG-specific content. -class BIDSEEGDataType(BIDSDataType): - pass - - -class BIDSMRIDataType(BIDSDataType): - @cached_property - def niftis(self) -> list['BIDSNifti']: - """ - The NIfTI files found in this MRI data type directory. - """ - - niftis: list[BIDSNifti] = [] - - for nifti_path in find_dir_nifti_files(self.path): - niftis.append(BIDSNifti(self, nifti_path.name)) - - return niftis - - -class BIDSNifti: - data_type: BIDSDataType - path: Path - suffix: str | None - - def __init__(self, data_type: BIDSDataType, name: str): - self.data_type = data_type - self.path = data_type.path / name - - suffix_match = re.search(r'_([a-zA-Z0-9]+)\.nii(\.gz)?$', self.name) - if suffix_match is not None: - self.suffix = suffix_match.group(1) - else: - self.suffix = None - - @property - def name(self) -> str: - return self.path.name - - @property - def root_dataset(self) -> BIDSDataset: - return self.data_type.root_dataset - - @property - def subject(self) -> BIDSSubject: - return self.data_type.subject - - @property - def session(self) -> BIDSSession: - return self.data_type.session - - def get_json_path(self) -> Path | None: - """ - Get the JSON sidecar file path of this NIfTI file if it exists. - """ - - json_name = replace_file_extension(self.name, 'json') - json_path = self.data_type.path / json_name - if not json_path.exists(): - return None - - return json_path - - def get_bval_path(self) -> Path | None: - """ - Get the BVAL file path of this NIfTI file if it exists. - """ - - bval_name = replace_file_extension(self.name, 'bval') - bval_path = self.data_type.path / bval_name - if not bval_path.exists(): - return None - - return bval_path - - def get_bvec_path(self) -> Path | None: - """ - Get the BVEC file path of this NIfTI file if it exists. - """ - - bvec_name = replace_file_extension(self.name, 'bvec') - bvec_path = self.data_type.path / bvec_name - if not bvec_path.exists(): - return None - - return bvec_path diff --git a/python/lib/imaging_lib/bids/eeg/dataset.py b/python/lib/imaging_lib/bids/eeg/dataset.py new file mode 100644 index 000000000..c3f4227d5 --- /dev/null +++ b/python/lib/imaging_lib/bids/eeg/dataset.py @@ -0,0 +1,6 @@ +# TODO: Complete with EEG-specific content. +from lib.imaging_lib.bids.dataset import BIDSDataType + + +class BIDSEEGDataType(BIDSDataType): + pass diff --git a/python/lib/imaging_lib/bids/mri/dataset.py b/python/lib/imaging_lib/bids/mri/dataset.py new file mode 100644 index 000000000..18e9abb5d --- /dev/null +++ b/python/lib/imaging_lib/bids/mri/dataset.py @@ -0,0 +1,90 @@ +import re +from functools import cached_property +from pathlib import Path + +from lib.imaging_lib.bids.dataset import BIDSDataset, BIDSDataType, BIDSSession, BIDSSubject +from lib.imaging_lib.nifti import find_dir_nifti_files +from lib.util.fs import replace_file_extension + + +class BIDSMRIDataType(BIDSDataType): + @cached_property + def niftis(self) -> list['BIDSNifti']: + """ + The NIfTI files found in this MRI data type directory. + """ + + niftis: list[BIDSNifti] = [] + + for nifti_path in find_dir_nifti_files(self.path): + niftis.append(BIDSNifti(self, nifti_path.name)) + + return niftis + + +class BIDSNifti: + data_type: BIDSDataType + path: Path + suffix: str | None + + def __init__(self, data_type: BIDSDataType, name: str): + self.data_type = data_type + self.path = data_type.path / name + + suffix_match = re.search(r'_([a-zA-Z0-9]+)\.nii(\.gz)?$', self.name) + if suffix_match is not None: + self.suffix = suffix_match.group(1) + else: + self.suffix = None + + @property + def name(self) -> str: + return self.path.name + + @property + def root_dataset(self) -> BIDSDataset: + return self.data_type.root_dataset + + @property + def subject(self) -> BIDSSubject: + return self.data_type.subject + + @property + def session(self) -> BIDSSession: + return self.data_type.session + + def get_json_path(self) -> Path | None: + """ + Get the JSON sidecar file path of this NIfTI file if it exists. + """ + + json_name = replace_file_extension(self.name, 'json') + json_path = self.data_type.path / json_name + if not json_path.exists(): + return None + + return json_path + + def get_bval_path(self) -> Path | None: + """ + Get the BVAL file path of this NIfTI file if it exists. + """ + + bval_name = replace_file_extension(self.name, 'bval') + bval_path = self.data_type.path / bval_name + if not bval_path.exists(): + return None + + return bval_path + + def get_bvec_path(self) -> Path | None: + """ + Get the BVEC file path of this NIfTI file if it exists. + """ + + bvec_name = replace_file_extension(self.name, 'bvec') + bvec_path = self.data_type.path / bvec_name + if not bvec_path.exists(): + return None + + return bvec_path diff --git a/python/lib/import_bids_dataset/main.py b/python/lib/import_bids_dataset/main.py index 9b528612e..d887b618e 100644 --- a/python/lib/import_bids_dataset/main.py +++ b/python/lib/import_bids_dataset/main.py @@ -11,8 +11,10 @@ from lib.db.queries.session import try_get_session_with_cand_id_visit_label from lib.eeg import Eeg from lib.env import Env -from lib.imaging_lib.bids.dataset import BIDSDataset, BIDSDataType, BIDSEEGDataType, BIDSMRIDataType, BIDSSession +from lib.imaging_lib.bids.dataset import BIDSDataset, BIDSDataType, BIDSSession from lib.imaging_lib.bids.dataset_description import BidsDatasetDescriptionError +from lib.imaging_lib.bids.eeg.dataset import BIDSEEGDataType +from lib.imaging_lib.bids.mri.dataset import BIDSMRIDataType from lib.imaging_lib.bids.tsv_participants import ( BidsTsvParticipant, merge_bids_tsv_participants, diff --git a/python/lib/import_bids_dataset/mri.py b/python/lib/import_bids_dataset/mri.py index 0737dd42a..a22e951dd 100644 --- a/python/lib/import_bids_dataset/mri.py +++ b/python/lib/import_bids_dataset/mri.py @@ -7,8 +7,8 @@ from lib.db.queries.file import try_get_file_with_hash, try_get_file_with_rel_path from lib.db.queries.mri_scan_type import try_get_mri_scan_type_with_name from lib.env import Env -from lib.imaging_lib.bids.dataset import BIDSNifti from lib.imaging_lib.bids.json import add_bids_json_file_parameters +from lib.imaging_lib.bids.mri.dataset import BIDSNifti from lib.imaging_lib.bids.tsv_scans import add_scan_tsv_file_parameters from lib.imaging_lib.bids.util import determine_bids_file_type from lib.imaging_lib.file import register_imaging_file From 4acc9b76cc8d959a76fe73cac01ca4ec587d581e Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Sun, 16 Nov 2025 13:16:57 +0000 Subject: [PATCH 12/12] change nifti class to bids mri acquisiton --- python/lib/imaging_lib/bids/dataset.py | 8 +- python/lib/imaging_lib/bids/mri/dataset.py | 81 +++++++------------ python/lib/imaging_lib/nifti.py | 11 --- python/lib/import_bids_dataset/mri.py | 91 +++++++++++----------- python/lib/util/fs.py | 35 ++++++--- 5 files changed, 105 insertions(+), 121 deletions(-) diff --git a/python/lib/imaging_lib/bids/dataset.py b/python/lib/imaging_lib/bids/dataset.py index e920a93df..c4d457ccb 100644 --- a/python/lib/imaging_lib/bids/dataset.py +++ b/python/lib/imaging_lib/bids/dataset.py @@ -14,7 +14,7 @@ if TYPE_CHECKING: from lib.imaging_lib.bids.eeg.dataset import BIDSEEGDataType - from lib.imaging_lib.bids.mri.dataset import BIDSMRIDataType, BIDSNifti + from lib.imaging_lib.bids.mri.dataset import BIDSMRIAcquisition, BIDSMRIDataType PYBIDS_IGNORE = ['code', 'sourcedata', 'log', '.git'] @@ -41,7 +41,7 @@ def data_types(self) -> Iterator['BIDSDataType']: yield from session.data_types @property - def niftis(self) -> Iterator['BIDSNifti']: + def niftis(self) -> Iterator['BIDSMRIAcquisition']: from lib.imaging_lib.bids.mri.dataset import BIDSMRIDataType for data_type in self.data_types: if isinstance(data_type, BIDSMRIDataType): @@ -164,7 +164,7 @@ def data_types(self) -> Iterator['BIDSDataType']: yield from session.data_types @property - def niftis(self) -> Iterator['BIDSNifti']: + def niftis(self) -> Iterator['BIDSMRIAcquisition']: from lib.imaging_lib.bids.mri.dataset import BIDSMRIDataType for data_type in self.data_types: if isinstance(data_type, BIDSMRIDataType): @@ -223,7 +223,7 @@ def root_dataset(self) -> BIDSDataset: return self.subject.root_dataset @property - def niftis(self) -> Iterator['BIDSNifti']: + def niftis(self) -> Iterator['BIDSMRIAcquisition']: for data_type in self.mri_data_types: yield from data_type.niftis diff --git a/python/lib/imaging_lib/bids/mri/dataset.py b/python/lib/imaging_lib/bids/mri/dataset.py index 18e9abb5d..2b16cb33d 100644 --- a/python/lib/imaging_lib/bids/mri/dataset.py +++ b/python/lib/imaging_lib/bids/mri/dataset.py @@ -3,42 +3,53 @@ from pathlib import Path from lib.imaging_lib.bids.dataset import BIDSDataset, BIDSDataType, BIDSSession, BIDSSubject -from lib.imaging_lib.nifti import find_dir_nifti_files -from lib.util.fs import replace_file_extension +from lib.util.fs import remove_path_extension, replace_path_extension class BIDSMRIDataType(BIDSDataType): @cached_property - def niftis(self) -> list['BIDSNifti']: + def niftis(self) -> list['BIDSMRIAcquisition']: """ The NIfTI files found in this MRI data type directory. """ - niftis: list[BIDSNifti] = [] + acquisitions: list[BIDSMRIAcquisition] = [] - for nifti_path in find_dir_nifti_files(self.path): - niftis.append(BIDSNifti(self, nifti_path.name)) + for file_path in self.path.iterdir(): + if file_path.name.endswith(('.nii', '.nii.gz')): + acquisitions.append(BIDSMRIAcquisition(self, file_path)) - return niftis + return acquisitions -class BIDSNifti: +class BIDSMRIAcquisition: data_type: BIDSDataType path: Path + nifti_path: Path + sidecar_path: Path | None + bval_path: Path | None + bvec_path: Path | None suffix: str | None - def __init__(self, data_type: BIDSDataType, name: str): - self.data_type = data_type - self.path = data_type.path / name + def __init__(self, data_type: BIDSDataType, nifti_path: Path): + self.data_type = data_type + self.path = remove_path_extension(nifti_path) + self.nifti_path = data_type.path / nifti_path - suffix_match = re.search(r'_([a-zA-Z0-9]+)\.nii(\.gz)?$', self.name) - if suffix_match is not None: - self.suffix = suffix_match.group(1) - else: - self.suffix = None + sidecar_path = replace_path_extension(self.path, 'json') + self.sidecar_path = sidecar_path if sidecar_path.exists() else None + + bval_path = replace_path_extension(self.path, 'bval') + self.bval_path = bval_path if bval_path.exists() else None + + bvec_path = replace_path_extension(self.path, 'bvec') + self.bvec_path = bvec_path if bvec_path.exists() else None + + suffix_match = re.search(r'_([a-zA-Z0-9]+)$', self.name) + self.suffix = suffix_match.group(1) if suffix_match is not None else None @property - def name(self) -> str: + def name(self): return self.path.name @property @@ -52,39 +63,3 @@ def subject(self) -> BIDSSubject: @property def session(self) -> BIDSSession: return self.data_type.session - - def get_json_path(self) -> Path | None: - """ - Get the JSON sidecar file path of this NIfTI file if it exists. - """ - - json_name = replace_file_extension(self.name, 'json') - json_path = self.data_type.path / json_name - if not json_path.exists(): - return None - - return json_path - - def get_bval_path(self) -> Path | None: - """ - Get the BVAL file path of this NIfTI file if it exists. - """ - - bval_name = replace_file_extension(self.name, 'bval') - bval_path = self.data_type.path / bval_name - if not bval_path.exists(): - return None - - return bval_path - - def get_bvec_path(self) -> Path | None: - """ - Get the BVEC file path of this NIfTI file if it exists. - """ - - bvec_name = replace_file_extension(self.name, 'bvec') - bvec_path = self.data_type.path / bvec_name - if not bvec_path.exists(): - return None - - return bvec_path diff --git a/python/lib/imaging_lib/nifti.py b/python/lib/imaging_lib/nifti.py index 0edb76897..267b597e7 100644 --- a/python/lib/imaging_lib/nifti.py +++ b/python/lib/imaging_lib/nifti.py @@ -1,4 +1,3 @@ -from collections.abc import Iterator from pathlib import Path from typing import Any, cast @@ -32,13 +31,3 @@ def add_nifti_file_parameters(nifti_path: Path, nifti_file_hash: str, file_param # Add the file BLAKE2b hash. file_parameters['file_blake2b_hash'] = nifti_file_hash - - -def find_dir_nifti_files(dir_path: Path) -> Iterator[Path]: - """ - Iterate over the Path objects of the NIfTI files found in a directory. - """ - - for item_path in dir_path.iterdir(): - if item_path.name.endswith(('.nii', '.nii.gz')): - yield item_path diff --git a/python/lib/import_bids_dataset/mri.py b/python/lib/import_bids_dataset/mri.py index a22e951dd..1d45568c1 100644 --- a/python/lib/import_bids_dataset/mri.py +++ b/python/lib/import_bids_dataset/mri.py @@ -8,7 +8,7 @@ from lib.db.queries.mri_scan_type import try_get_mri_scan_type_with_name from lib.env import Env from lib.imaging_lib.bids.json import add_bids_json_file_parameters -from lib.imaging_lib.bids.mri.dataset import BIDSNifti +from lib.imaging_lib.bids.mri.dataset import BIDSMRIAcquisition from lib.imaging_lib.bids.tsv_scans import add_scan_tsv_file_parameters from lib.imaging_lib.bids.util import determine_bids_file_type from lib.imaging_lib.file import register_imaging_file @@ -19,7 +19,7 @@ from lib.import_bids_dataset.env import BIDSImportEnv from lib.logging import log, log_warning from lib.util.crypto import compute_file_blake2b_hash -from lib.util.fs import get_file_extension +from lib.util.fs import get_path_extension KNOWN_SUFFIXES_PER_MRI_DATA_TYPE = { 'anat': [ @@ -38,7 +38,7 @@ } -def import_bids_nifti(env: Env, import_env: BIDSImportEnv, session: DbSession, nifti: BIDSNifti): +def import_bids_nifti(env: Env, import_env: BIDSImportEnv, session: DbSession, acquisition: BIDSMRIAcquisition): """ Import a BIDS NIfTI file and its associated files in LORIS. """ @@ -46,16 +46,19 @@ def import_bids_nifti(env: Env, import_env: BIDSImportEnv, session: DbSession, n log( env, ( - f"Importing MRI file '{nifti.name}'... ({import_env.processed_files_count + 1}" + f"Importing MRI acquisition '{acquisition.name}'... ({import_env.processed_files_count + 1}" f" / {import_env.total_files_count})" ), ) # Get the relevant `scans.tsv` row if there is one. - tsv_scan = nifti.session.get_tsv_scan(nifti.name) + tsv_scan = acquisition.session.get_tsv_scan(acquisition.nifti_path.name) if tsv_scan is None: - log_warning(env, f"No scans.tsv row found for file '{nifti.name}', scans.tsv data will be ignored.") + log_warning( + env, + f"No scans.tsv row found for acquisition '{acquisition.name}', scans.tsv data will be ignored.", + ) # Get the path at which to copy the file. @@ -64,10 +67,10 @@ def import_bids_nifti(env: Env, import_env: BIDSImportEnv, session: DbSession, n cast(Path, import_env.loris_bids_path) / f'sub-{session.candidate.psc_id}' / f'ses-{session.visit_label}' - / nifti.data_type.name + / acquisition.data_type.name ) - loris_file_path = loris_file_dir_path / nifti.name + loris_file_path = loris_file_dir_path / acquisition.nifti_path.name loris_file_rel_path = loris_file_path.relative_to(import_env.data_dir_path) @@ -81,40 +84,36 @@ def import_bids_nifti(env: Env, import_env: BIDSImportEnv, session: DbSession, n # Get information about the file. - file_type = get_check_nifti_imaging_file_type(env, nifti) - file_hash = get_check_nifti_file_hash(env, nifti) - mri_scan_type = get_nifti_mri_scan_type(env, import_env, nifti) + file_type = get_check_nifti_imaging_file_type(env, acquisition) + file_hash = get_check_nifti_file_hash(env, acquisition) + mri_scan_type = get_nifti_mri_scan_type(env, import_env, acquisition) # Get the auxiliary files. aux_file_paths: list[Path] = [] - json_path = nifti.get_json_path() + if acquisition.bval_path is not None: + aux_file_paths.append(acquisition.bval_path) - bval_path = nifti.get_bval_path() - if bval_path is not None: - aux_file_paths.append(bval_path) - - bvec_path = nifti.get_bvec_path() - if bvec_path is not None: - aux_file_paths.append(bvec_path) + if acquisition.bvec_path is not None: + aux_file_paths.append(acquisition.bvec_path) # Get the file parameters. file_parameters: dict[str, Any] = {} - if json_path is not None: - json_loris_path = loris_file_dir_path / json_path.name + if acquisition.sidecar_path is not None: + json_loris_path = loris_file_dir_path / acquisition.sidecar_path.name json_loris_rel_path = json_loris_path.relative_to(import_env.data_dir_path) - add_bids_json_file_parameters(env, json_path, json_loris_rel_path, file_parameters) + add_bids_json_file_parameters(env, acquisition.sidecar_path, json_loris_rel_path, file_parameters) - add_nifti_file_parameters(nifti.path, file_hash, file_parameters) + add_nifti_file_parameters(acquisition.nifti_path, file_hash, file_parameters) - if nifti.session.tsv_scans_path is not None and tsv_scan is not None: - add_scan_tsv_file_parameters(tsv_scan, nifti.session.tsv_scans_path, file_parameters) + if acquisition.session.tsv_scans_path is not None and tsv_scan is not None: + add_scan_tsv_file_parameters(tsv_scan, acquisition.session.tsv_scans_path, file_parameters) for aux_file_path in aux_file_paths: - aux_file_type = get_file_extension(aux_file_path.name) + aux_file_type = get_path_extension(aux_file_path) aux_file_hash = compute_file_blake2b_hash(aux_file_path) aux_file_loris_path = loris_file_dir_path / aux_file_path.name aux_file_loris_rel_path = aux_file_loris_path.relative_to(import_env.data_dir_path) @@ -123,10 +122,10 @@ def import_bids_nifti(env: Env, import_env: BIDSImportEnv, session: DbSession, n # Copy the files on the file system. - copy_bids_file(loris_file_dir_path, nifti.path) + copy_bids_file(loris_file_dir_path, acquisition.nifti_path) - if json_path is not None: - copy_bids_file(loris_file_dir_path, json_path) + if acquisition.sidecar_path is not None: + copy_bids_file(loris_file_dir_path, acquisition.sidecar_path) for aux_file_path in aux_file_paths: copy_bids_file(loris_file_dir_path, aux_file_path) @@ -159,26 +158,26 @@ def import_bids_nifti(env: Env, import_env: BIDSImportEnv, session: DbSession, n import_env.imported_files_count += 1 -def get_check_nifti_imaging_file_type(env: Env, nifti: BIDSNifti) -> str: +def get_check_nifti_imaging_file_type(env: Env, acqusition: BIDSMRIAcquisition) -> str: """ Get the BIDS file type of a NIfTI file and raise an exception if that file type is not registered in the database. """ - file_type = determine_bids_file_type(env, nifti.name) + file_type = determine_bids_file_type(env, acqusition.nifti_path.name) if file_type is None: raise Exception("No matching file type found in the database.") return file_type -def get_check_nifti_file_hash(env: Env, nifti: BIDSNifti) -> str: +def get_check_nifti_file_hash(env: Env, acquisition: BIDSMRIAcquisition) -> str: """ Compute the BLAKE2b hash of a NIfTI file and raise an exception if that hash is already registered in the database. """ - file_hash = compute_file_blake2b_hash(nifti.path) + file_hash = compute_file_blake2b_hash(acquisition.nifti_path) file = try_get_file_with_hash(env.db, file_hash) if file is not None: @@ -187,27 +186,31 @@ def get_check_nifti_file_hash(env: Env, nifti: BIDSNifti) -> str: return file_hash -def get_nifti_mri_scan_type(env: Env, import_env: BIDSImportEnv, nifti: BIDSNifti) -> DbMriScanType | None: +def get_nifti_mri_scan_type( + env: Env, + import_env: BIDSImportEnv, + acquisition: BIDSMRIAcquisition, +) -> DbMriScanType | None: """ - Get the MRI scan type corresponding to a NIfTI file using its BIDS suffix. Create the MRI scan - type in the database the suffix is a standard BIDS suffix and the scan type does not already - exist in the database, or raise an exception if no known scan type is found. + Get the MRI scan type corresponding to a BIDS MRI acquisition using its BIDS suffix. Create the + MRI scan type in the database the suffix is a standard BIDS suffix and the scan type does not + already exist in the database, or raise an exception if no known scan type is found. """ - if nifti.suffix is None: + if acquisition.suffix is None: raise Exception("No BIDS suffix found in the NIfTI file name, cannot infer the file data type.") - mri_scan_type = try_get_mri_scan_type_with_name(env.db, nifti.suffix) + mri_scan_type = try_get_mri_scan_type_with_name(env.db, acquisition.suffix) if mri_scan_type is not None: return mri_scan_type - if nifti.suffix not in KNOWN_SUFFIXES_PER_MRI_DATA_TYPE[nifti.data_type.name]: - if nifti.suffix not in import_env.unknown_scan_types: - import_env.unknown_scan_types.append(nifti.suffix) + if acquisition.suffix not in KNOWN_SUFFIXES_PER_MRI_DATA_TYPE[acquisition.data_type.name]: + if acquisition.suffix not in import_env.unknown_scan_types: + import_env.unknown_scan_types.append(acquisition.suffix) - raise Exception(f"Found unknown MRI file suffix '{nifti.suffix}'.") + raise Exception(f"Found unknown MRI file suffix '{acquisition.suffix}'.") - return create_mri_scan_type(env, nifti.suffix) + return create_mri_scan_type(env, acquisition.suffix) def copy_bids_file(loris_file_dir_path: Path, file_path: Path): diff --git a/python/lib/util/fs.py b/python/lib/util/fs.py index a5193b272..4d56eb7ee 100644 --- a/python/lib/util/fs.py +++ b/python/lib/util/fs.py @@ -81,27 +81,44 @@ def remove_empty_directories(dir_path: str): os.rmdir(subdir_path) -def get_file_extension(file_name: str) -> str: +def get_path_stem(path: Path) -> str: """ - Get the extension (including multiple extensions) of a file name or path without the leading - dot. + Get the stem of a path, that is, the name of the file without its extension (including multiple + extensions). """ - parts = file_name.split('.', maxsplit=1) + parts = path.name.split('.') + return parts[0] + + +def get_path_extension(path: Path) -> str: + """ + Get the extension (including multiple extensions) of a path without the leading dot. + """ + + parts = path.name.split('.', maxsplit=1) if len(parts) == 1: return '' return parts[1] -def replace_file_extension(file_name: str, extension: str) -> str: +def remove_path_extension(path: Path) -> Path: + """ + Remove the extension (including multiple extensions) of a path. + """ + + parts = path.name.split('.') + return path.parent / parts[0] + + +def replace_path_extension(path: Path, extension: str) -> Path: """ - Replace the extension (including multiple extensions) of a file name or path by another - extension. + Replace the extension (including multiple extensions) of a path by another extension. """ - parts = file_name.split('.') - return f'{parts[0]}.{extension}' + parts = path.name.split('.') + return path.parent / f'{parts[0]}.{extension}' def search_dir_file_with_regex(dir_path: Path, regex: str) -> Path | None: