diff --git a/pyproject.toml b/pyproject.toml index 068ac2b6b..385343434 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,15 +67,16 @@ include = [ "python/tests", "python/lib/db", "python/lib/imaging_lib", + "python/lib/import_bids_dataset", "python/lib/import_dicom_study", "python/lib/util", - "python/lib/bids.py", "python/lib/config.py", "python/lib/config_file.py", "python/lib/env.py", "python/lib/get_session_info.py", "python/lib/logging.py", "python/lib/make_env.py", + "python/scripts/import_bids_dataset.py", "python/scripts/import_dicom_study.py", "python/scripts/summarize_dicom_study.py", ] diff --git a/python/lib/bidsreader.py b/python/lib/bidsreader.py deleted file mode 100644 index bd7da2508..000000000 --- a/python/lib/bidsreader.py +++ /dev/null @@ -1,283 +0,0 @@ -"""Reads a BIDS structure into a data dictionary using bids.grabbids.""" - -import json -import re -import sys - -from bids import BIDSLayout - -import lib.exitcode -import lib.utilities as utilities - -# import bids -# BIDSLayoutIndexer is required for PyBIDS >= 0.12.1 -# bids_pack_version = list(map(int, bids.__version__.split('.'))) -# if (bids_pack_version[0] > 0 -# or bids_pack_version[1] > 12 -# or (bids_pack_version[1] == 12 and bids_pack_version[2] > 0)): - -# from bids import BIDSLayoutIndexer - - -class BidsReader: - """ - This class reads a BIDS structure into a data dictionary using BIDS grabbids. - This dictionary will then be used to determine what to register into the - database. - - :Example: - - from lib.bidsreader import BidsReader - - # load the BIDS directory - bids_reader = BidsReader(bids_dir) - """ - - def __init__(self, bids_dir, verbose, validate = True): - """ - Constructor method for the BidsReader class. - - :param bids_dir: path to the BIDS structure to read - :type bids_dir: str - :param verbose : boolean to print verbose information - :type verbose : bool - :param validate : boolean to validate the BIDS dataset - :type validate : bool - """ - - self.verbose = verbose - self.bids_dir = bids_dir - self.bids_layout = self.load_bids_data(validate) - - # load dataset name and BIDS version - self.dataset_name = None - self.bids_version = None - try: - dataset_json = bids_dir + "/dataset_description.json" - dataset_description = {} - with open(dataset_json) as json_file: - dataset_description = json.load(json_file) - self.dataset_name = dataset_description['Name'] - self.bids_version = dataset_description['BIDSVersion'] - except Exception: - print("WARNING: Cannot read dataset_description.json") - - # load BIDS candidates information - self.participants_info = self.load_candidates_from_bids() - - # load BIDS sessions information - self.cand_sessions_list = self.load_sessions_from_bids() - - # load BIDS modality information - self.cand_session_modalities_list = self.load_modalities_from_bids() - - def load_bids_data(self, validate): - """ - Loads the BIDS study using the BIDSLayout function (part of the pybids - package) and return the object. - - :return: bids structure - """ - - if self.verbose: - print('Loading the BIDS dataset with BIDS layout library...\n') - - exclude_arr = ['code/', 'sourcedata/', 'log/', '.git'] - force_arr = [re.compile(r"_annotations\.(tsv|json)$")] - - # BIDSLayoutIndexer is required for PyBIDS >= 0.12.1 - # bids_pack_version = list(map(int, bids.__version__.split('.'))) - # disabled until is a workaround for https://github.com/bids-standard/pybids/issues/760 is found - # [file] bids_import.py - # [function] read_and_insert_bids - # [line] for modality in row['modalities']: (row['modalities'] is empty) - # if (bids_pack_version[0] > 0 - # or bids_pack_version[1] > 12 - # or (bids_pack_version[1] == 12 and bids_pack_version[2] > 0)): - # bids_layout = BIDSLayout( - # root=self.bids_dir, - # indexer=BIDSLayoutIndexer(ignore=exclude_arr, force_index=force_arr) - # ) - # else: - bids_layout = BIDSLayout( - root=self.bids_dir, - ignore=exclude_arr, - force_index=force_arr, - derivatives=True, - validate=validate - ) - - if self.verbose: - print('\t=> BIDS dataset loaded with BIDS layout\n') - - return bids_layout - - def load_candidates_from_bids(self): - """ - Loads the list of candidates from the BIDS study. List of - participants and their information will be stored in participants_info. - - :return: list of dictionaries with participant information from BIDS - :rtype: list - """ - - if self.verbose: - print('Grepping candidates from the BIDS layout...') - - # grep the participant.tsv file and parse it - participants_info = None - for file in self.bids_layout.get(suffix='participants', return_type='filename'): - # note file[0] returns the path to participants.tsv - if 'participants.tsv' in file: - participants_info = utilities.read_tsv_file(file) - else: - continue - - if participants_info: - self.candidates_list_validation(participants_info) - else: - bids_subjects = self.bids_layout.get_subjects() - participants_info = [{'participant_id': sub_id} for sub_id in bids_subjects] - - if self.verbose: - print('\t=> List of participants found:') - for participant in participants_info: - print('\t\t' + participant['participant_id']) - print('\n') - - return participants_info - - def candidates_list_validation(self, participants_info): - """ - Validates whether the subjects listed in participants.tsv match the - list of participant directory. If there is a mismatch, will exit with - error code from lib.exitcode. - """ - - if self.verbose: - print('Validating the list of participants...') - - subjects = self.bids_layout.get_subjects() - - mismatch_message = ("\nERROR: Participant ID mismatch between " - "participants.tsv and raw data found in the BIDS " - "directory") - - # check that all subjects listed in participants_info are also in - # subjects array and vice versa - for row in participants_info: - # remove the "sub-" in front of the subject ID if present - row['participant_id'] = row['participant_id'].replace('sub-', '') - if row['participant_id'] not in subjects: - print(mismatch_message) - print(row['participant_id'] + 'is missing from the BIDS Layout') - print('List of subjects parsed by the BIDS layout: ' + ', '.join(subjects)) - sys.exit(lib.exitcode.BIDS_CANDIDATE_MISMATCH) - # remove the subject from the list of subjects - subjects.remove(row['participant_id']) - - # check that no subjects are left in subjects array - if subjects: - print(mismatch_message) - sys.exit(lib.exitcode.BIDS_CANDIDATE_MISMATCH) - - if self.verbose: - print('\t=> Passed validation of the list of participants\n') - - def load_sessions_from_bids(self): - """ - Grep the list of sessions for each candidate directly from the BIDS - structure. - - :return: dictionary with the list of sessions and candidates found in the - BIDS structure - :rtype: dict - """ - - if self.verbose: - print('Grepping list of sessions from the BIDS layout...') - - cand_sessions = {} - - for row in self.participants_info: - ses = self.bids_layout.get_sessions(subject=row['participant_id']) - cand_sessions[row['participant_id']] = ses - - if self.verbose: - print('\t=> List of sessions found:\n') - for candidate in cand_sessions: - if cand_sessions[candidate]: - print('\t\t' + candidate + ': ' + ', '.join(cand_sessions[candidate])) - else: - print('\t\tNo session found for candidate ' + candidate) - print('\n') - - return cand_sessions - - def load_modalities_from_bids(self): - """ - Grep the list of modalities available for each session and candidate directly - from the BIDS structure. - - :return: dictionary for candidate and session with list of modalities - :rtype: dict - """ - - if self.verbose: - print('Grepping the different modalities from the BIDS layout...') - - cand_session_modalities_list = [] - - for subject, visit_list in self.cand_sessions_list.items(): - if visit_list: - for visit in visit_list: - modalities = self.bids_layout.get_datatype(subject=subject, session=visit) - cand_session_modalities_list.append({ - 'bids_sub_id': subject, - 'bids_ses_id': visit, - 'modalities' : modalities - }) - else: - modalities = self.bids_layout.get_datatype(subject=subject) - cand_session_modalities_list.append({ - 'bids_sub_id': subject, - 'bids_ses_id': None, - 'modalities' : modalities - }) - - if self.verbose: - print('\t=> Done grepping the different modalities from the BIDS layout\n') - - return cand_session_modalities_list - - @staticmethod - def grep_file(files_list, match_pattern, derivative_pattern=None): - """ - Grep a unique file based on a match pattern and returns it. - - :param files_list : list of files to look into - :type files_list : list - :param match_pattern : pattern to use to find the file - :type match_pattern : str - :param derivative_pattern: derivative pattern to use if the file we look for - is a derivative file - :type derivative_pattern: str - - :return: name of the first file that matches the pattern - :rtype: str - """ - - for filename in files_list: - if not derivative_pattern: - if 'derivatives' in filename: - # skip all files with 'derivatives' string in their path - continue - elif re.search(match_pattern, filename): - # grep the file that matches the match_pattern (extension) - return filename - else: - matches_derivative = re.search(derivative_pattern, filename) - if re.search(match_pattern, filename) and matches_derivative: - return filename - - return None diff --git a/python/lib/candidate.py b/python/lib/candidate.py index bbb6981c6..8f617f466 100644 --- a/python/lib/candidate.py +++ b/python/lib/candidate.py @@ -1,11 +1,6 @@ """This class gather functions for candidate handling.""" import random -import sys - -from dateutil.parser import parse - -import lib.exitcode class Candidate: @@ -57,127 +52,6 @@ def __init__(self, verbose, psc_id=None, cand_id=None, sex=None, dob=None): self.center_id = None self.project_id = None - def create_candidate(self, db, participants_info): - """ - Creates a candidate using BIDS information provided in the - participants_info's list. - - :param db : database handler object - :type db : object - :param participants_info: list of dictionary with participants - information from BIDS - :type participants_info: list - - :return: dictionary with candidate info from the candidate's table - :rtype: dict - """ - - if not self.psc_id: - print("Cannot create a candidate without a PSCID.\n") - sys.exit(lib.exitcode.CANDIDATE_CREATION_FAILURE) - - if not self.cand_id: - self.cand_id = self.generate_cand_id(db) - - for row in participants_info: - if not row['participant_id'] == self.psc_id: - continue - self.grep_bids_dob(row) - if 'sex' in row: - self.map_sex(row['sex']) - if 'age' in row: - self.age = row['age'] - - # three steps to find site: - # 1. try matching full name from 'site' column in participants.tsv in db - # 2. try extracting alias from pscid - # 3. try finding previous site in candidate table - - if 'site' in row and row['site'].lower() not in ("null", ""): - # search site id in psc table by its full name - site_info = db.pselect( - "SELECT CenterID FROM psc WHERE Name = %s", - [row['site'], ] - ) - if len(site_info) > 0: - self.center_id = site_info[0]['CenterID'] - - if self.center_id is None: - # search site id in psc table by its alias extracted from pscid - db_sites = db.pselect("SELECT CenterID, Alias FROM psc") - for site in db_sites: - if site['Alias'] in row['participant_id']: - self.center_id = site['CenterID'] - - if self.center_id is None: - # try to find participant site in db - candidate_site_project = db.pselect( - "SELECT RegistrationCenterID FROM candidate WHERE pscid = %s", - [self.psc_id, ] - ) - if len(candidate_site_project) > 0: - self.center_id = candidate_site_project[0]['RegistrationCenterID'] - - # two steps to find project: - # 1. find full name in 'project' column in participants.tsv - # 2. find previous in candidate table - - if 'project' in row and row['project'].lower() not in ("null", ""): - # search project id in Project table by its full name - project_info = db.pselect( - "SELECT ProjectID FROM Project WHERE Name = %s", - [row['project'], ] - ) - if len(project_info) > 0: - self.project_id = project_info[0]['ProjectID'] - - if self.project_id is None: - # try to find participant project - candidate_site_project = db.pselect( - "SELECT RegistrationProjectID FROM candidate WHERE pscid = %s", - [self.psc_id, ] - ) - if len(candidate_site_project) > 0: - self.center_id = candidate_site_project[0]['RegistrationProjectID'] - - if not self.center_id: - print("ERROR: could not determine site for " + self.psc_id + "." - + " Please check that your psc table contains a site with an" - + " alias matching the BIDS participant_id or a name matching the site mentioned in" - + " participants.tsv's site column") - sys.exit(lib.exitcode.PROJECT_CUSTOMIZATION_FAILURE) - - if not self.project_id: - print("ERROR: could not determine project for " + self.psc_id + "." - + " Please check that your project table contains a project with a" - + " name matching the participants.tsv's project column") - sys.exit(lib.exitcode.PROJECT_CUSTOMIZATION_FAILURE) - - if self.verbose: - print("Creating candidate with \n" - + "PSCID = " + self.psc_id + ",\n" - + "CandID = " + str(self.cand_id) + ",\n" - + "CenterID = " + str(self.center_id) + ",\n" - + "ProjectID = " + str(self.project_id)) - - insert_col = ('PSCID', 'CandID', 'RegistrationCenterID', 'RegistrationProjectID') - insert_val = (self.psc_id, str(self.cand_id), str(self.center_id), str(self.project_id)) - - if self.sex: - insert_col = (*insert_col, 'Sex') - insert_val = (*insert_val, self.sex) - if self.dob: - insert_col = (*insert_col, 'DoB') - insert_val = (*insert_val, self.dob) - - db.insert( - table_name='candidate', - column_names=insert_col, - values=insert_val - ) - - return self.get_candidate_info_from_loris(db) - def get_candidate_info_from_loris(self, db): """ Grep candidate information from the candidate table using the PSCID or CandID. @@ -218,22 +92,6 @@ def map_sex(self, sex): if sex.lower() in ('f', 'female'): self.sex = 'Female' - def grep_bids_dob(self, subject_info): - """ - Greps the date of birth from the BIDS structure and add it to self.dob which - will be inserted into the DoB field of the candidate table - - :param subject_info: dictionary with all information present in the BIDS - participants.tsv file for a given candidate - :type subject_info: dict - """ - - dob_names = ['date_of_birth', 'birth_date', 'dob'] - for name in dob_names: - if name in subject_info: - dob = parse(subject_info[name]) - self.dob = dob.strftime('%Y-%m-%d') - @staticmethod def generate_cand_id(db): """ diff --git a/python/lib/config.py b/python/lib/config.py index e011164bc..e658638e8 100644 --- a/python/lib/config.py +++ b/python/lib/config.py @@ -26,6 +26,15 @@ def get_patient_id_dicom_header_config(env: Env) -> Literal['PatientID', 'Patien return patient_id_dicom_header +def get_default_bids_visit_label_config(env: Env) -> str: + """ + Get the default BIDS visit label from the in-database configuration, or exit the program with + an error if that configuration value does not exist. + """ + + return _get_config_value(env, 'default_bids_vl') + + def get_data_dir_path_config(env: Env) -> str: """ Get the LORIS base data directory path from the in-database configuration, or exit the program diff --git a/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py b/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py index 5a241623f..80c58e7ec 100644 --- a/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py +++ b/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py @@ -7,11 +7,11 @@ import sys import lib.exitcode -from lib.bids import get_bids_json_session_info from lib.db.queries.dicom_archive import try_get_dicom_archive_series_with_series_uid_echo_time from lib.dcm2bids_imaging_pipeline_lib.base_pipeline import BasePipeline from lib.get_session_info import SessionConfigError, get_dicom_archive_session_info -from lib.imaging_lib.nifti import add_nifti_spatial_file_parameters +from lib.imaging_lib.bids.json import get_bids_json_session_info +from lib.imaging_lib.nifti import add_nifti_file_parameters from lib.logging import log_error_exit, log_verbose from lib.util.crypto import compute_file_blake2b_hash, compute_file_md5_hash @@ -74,7 +74,7 @@ def __init__(self, loris_getopt_obj, script_name): # Load the JSON file object with scan parameters if a JSON file was provided # --------------------------------------------------------------------------------------------- self.json_file_dict = self._load_json_sidecar_file() - add_nifti_spatial_file_parameters(self.nifti_path, self.json_file_dict) + add_nifti_file_parameters(self.nifti_path, self.nifti_blake2, self.json_file_dict) # --------------------------------------------------------------------------------- # Determine subject IDs based on DICOM headers and validate the IDs against the DB @@ -560,7 +560,6 @@ def _create_destination_dir_and_move_image_files(self, destination): self.move_file(original_file_path, new_file_path) if destination == 'assembly_bids': - self.json_file_dict['file_blake2b_hash'] = self.nifti_blake2 if self.json_path: self.json_file_dict['bids_json_file'] = json_rel_path self.json_file_dict['bids_json_file_blake2b_hash'] = self.json_blake2 diff --git a/python/lib/eeg.py b/python/lib/eeg.py index 5c08b05e1..ca7ca7d72 100644 --- a/python/lib/eeg.py +++ b/python/lib/eeg.py @@ -3,19 +3,19 @@ import getpass import json import os -import sys +from typing import Any, Literal import lib.exitcode import lib.utilities as utilities -from lib.candidate import Candidate +from lib.database import Database from lib.database_lib.config import Config from lib.database_lib.physiological_event_archive import PhysiologicalEventArchive from lib.database_lib.physiological_event_file import PhysiologicalEventFile from lib.database_lib.physiological_modality import PhysiologicalModality from lib.database_lib.physiological_output_type import PhysiologicalOutputType +from lib.db.models.session import DbSession +from lib.imaging_lib.bids.eeg.dataset import BIDSEEGDataType from lib.physiological import Physiological -from lib.scanstsv import ScansTSV -from lib.session import Session from lib.util.crypto import compute_file_blake2b_hash @@ -23,97 +23,33 @@ class Eeg: """ This class reads the BIDS EEG data structure and register the EEG datasets into the database by calling the lib.physiological class. - - :Example: - - from lib.bidsreader import BidsReader - from lib.eeg import Eeg - from lib.database import Database - from lib.database_lib.config import Config - - # database connection - db = Database(config_file.mysql, verbose) - db.connect() - - # grep config settings from the Config module - config_obj = Config(db, verbose) - default_bids_vl = config_obj.get_config('default_bids_vl') - data_dir = config_obj.get_config('dataDirBasepath') - - # load the BIDS directory - bids_reader = BidsReader(bids_dir) - - # create the LORIS_BIDS directory in data_dir based on Name and BIDS version - loris_bids_root_dir = create_loris_bids_directory( - bids_reader, data_dir, verbose - ) - for row in bids_reader.cand_session_modalities_list: - for modality in row['modalities']: - if modality == 'eeg': - bids_session = row['bids_ses_id'] - visit_label = bids_session if bids_session else default_bids_vl - loris_bids_eeg_rel_dir = "sub-" + row['bids_sub_id'] + "/" + \ - "ses-" + visit_label + "/eeg/" - lib.utilities.create_dir( - loris_bids_root_dir + loris_bids_eeg_rel_dir, verbose - ) - Eeg( - bids_reader = bids_reader, - bids_sub_id = row['bids_sub_id'], - bids_ses_id = row['bids_ses_id'], - bids_modality = modality, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_eeg_rel_dir = loris_bids_eeg_rel_dir, - loris_bids_root_dir = loris_bids_root_dir, - dataset_tag_dict = dataset_tag_dict - ) - - # disconnect from the database - db.disconnect() """ - def __init__(self, bids_reader, bids_sub_id, bids_ses_id, bids_modality, db, - verbose, data_dir, default_visit_label, loris_bids_eeg_rel_dir, - loris_bids_root_dir, dataset_tag_dict, dataset_type): + def __init__( + self, data_type: BIDSEEGDataType, session: DbSession, db: Database, verbose: bool, data_dir: str, + loris_bids_eeg_rel_dir: str, loris_bids_root_dir: str | None, dataset_tag_dict: dict[Any, Any], + dataset_type: Literal['raw', 'derivative'] | None, + ): """ Constructor method for the Eeg class. - :param bids_reader : dictionary with BIDS reader information - :type bids_reader : dict - :param bids_sub_id : BIDS subject ID (that will be used as PSCID) - :type bids_sub_id : str - :param bids_ses_id : BIDS session ID (that will be used for the visit label) - :type bids_ses_id : str - :param bids_modality: BIDS modality (a.k.a. EEG) - :tyoe bids_modality: str + :param data_type : The BIDS data type object. + :param session : The session database object. :param db : Database class object - :type db : object :param verbose : whether to be verbose - :type verbose : bool :param data_dir : LORIS data directory path (usually /data/PROJECT/data) - :type data_dir : str - :param default_visit_label : default visit label to be used if no BIDS - session are present in the BIDS structure - :type default_visit_label : str :param loris_bids_eeg_rel_dir: LORIS BIDS EEG relative dir path to data_dir - :type loris_bids_eeg_rel_dir: str :param loris_bids_root_dir : LORIS BIDS root directory path - :type loris_bids_root_dir : str :param dataset_tag_dict : Dict of dataset-inherited HED tags - :type dataset_tag_dict : dict :param dataset_type : raw | derivative. Type of the dataset - :type dataset_type : string """ # config self.config_db_obj = Config(db, verbose) # load bids objects - self.bids_reader = bids_reader - self.bids_layout = bids_reader.bids_layout + self.data_type = data_type + self.bids_layout = data_type.root_dataset.layout # load the LORIS BIDS import root directory where the eeg files will # be copied @@ -121,11 +57,6 @@ def __init__(self, bids_reader, bids_sub_id, bids_ses_id, bids_modality, db, self.loris_bids_root_dir = loris_bids_root_dir self.data_dir = data_dir - # load bids subject, visit and modality - self.bids_sub_id = bids_sub_id - self.bids_ses_id = bids_ses_id - self.bids_modality = bids_modality - # load dataset tag dict. Used to ensure HED tags aren't duplicated self.dataset_tag_dict = dataset_tag_dict @@ -134,35 +65,19 @@ def __init__(self, bids_reader, bids_sub_id, bids_ses_id, bids_modality, db, self.verbose = verbose # find corresponding CandID and SessionID in LORIS - self.loris_cand_info = self.get_loris_cand_info() - self.default_vl = default_visit_label - self.psc_id = self.loris_cand_info['PSCID'] - self.cand_id = self.loris_cand_info['CandID'] - self.center_id = self.loris_cand_info['RegistrationCenterID'] - self.project_id = self.loris_cand_info['RegistrationProjectID'] + self.session = session hed_query = 'SELECT * FROM hed_schema_nodes WHERE 1' self.hed_union = self.db.pselect(query=hed_query, args=()) - self.cohort_id = None - for row in bids_reader.participants_info: - if not row['participant_id'] == self.bids_sub_id: - continue - if 'cohort' in row: - cohort_info = db.pselect( - "SELECT CohortID FROM cohort WHERE title = %s", - [row['cohort'], ] - ) - if len(cohort_info) > 0: - self.cohort_id = cohort_info[0]['CohortID'] - break - - self.session_id = self.get_loris_session_id() - # check if a tsv with acquisition dates or age is available for the subject self.scans_file = None - if self.bids_layout.get(suffix='scans', subject=self.bids_sub_id, return_type='filename'): - self.scans_file = self.bids_layout.get(suffix='scans', subject=self.bids_sub_id, return_type='filename')[0] + if self.bids_layout.get(suffix='scans', subject=self.data_type.subject.label, return_type='filename'): + self.scans_file = self.bids_layout.get( + suffix='scans', + subject=self.data_type.subject.label, + return_type='filename' + )[0] # register the data into LORIS if (dataset_type and dataset_type == 'raw'): @@ -173,59 +88,6 @@ def __init__(self, bids_reader, bids_sub_id, bids_ses_id, bids_modality, db, self.register_data() self.register_data(derivatives=True) - def get_loris_cand_info(self): - """ - Gets the LORIS Candidate info for the BIDS subject. - - :return: Candidate info of the subject found in the database - :rtype: list - """ - - candidate = Candidate(verbose=self.verbose, cand_id=self.bids_sub_id) - loris_cand_info = candidate.get_candidate_info_from_loris(self.db) - - if not loris_cand_info: - candidate = Candidate(verbose=self.verbose, psc_id=self.bids_sub_id) - loris_cand_info = candidate.get_candidate_info_from_loris(self.db) - - if not loris_cand_info: - print("Candidate " + self.bids_sub_id + " not found. You can retry with the --createcandidate option.\n") - sys.exit(lib.exitcode.CANDIDATE_NOT_FOUND) - - return loris_cand_info - - def get_loris_session_id(self): - """ - Greps the LORIS session.ID corresponding to the BIDS visit. Note, - if no BIDS visit are set, will use the default visit label value set - in the config module - - :return: the session's ID in LORIS - :rtype: int - """ - - # check if there are any visit label in BIDS structure, if not, - # will use the default visit label set in the config module - visit_label = self.bids_ses_id if self.bids_ses_id else self.default_vl - - session = Session( - self.db, self.verbose, self.cand_id, visit_label, - self.center_id, self.project_id, self.cohort_id - ) - loris_vl_info = session.get_session_info_from_loris() - - if not loris_vl_info: - message = "ERROR: visit label " + visit_label + " does not exist in " + \ - "the session table for candidate " + str(self.cand_id) + \ - "\nPlease make sure the visit label is created in the " + \ - "database or run bids_import.py with the -s option -s if " + \ - "you wish that the insertion pipeline creates the visit " + \ - "label in the session table." - print(message) - exit(lib.exitcode.SELECT_FAILURE) - - return loris_vl_info['ID'] - def grep_bids_files(self, bids_type): """ Greps the BIDS files and their layout information from the BIDSLayout @@ -239,18 +101,18 @@ def grep_bids_files(self, bids_type): :rtype: list """ - if self.bids_ses_id: + if self.data_type.session.label: return self.bids_layout.get( - subject = self.bids_sub_id, - session = self.bids_ses_id, - datatype = self.bids_modality, + subject = self.data_type.subject.label, + session = self.data_type.session.label, + datatype = self.data_type.name, suffix = bids_type, return_type = 'filename' ) else: return self.bids_layout.get( - subject = self.bids_sub_id, - datatype = self.bids_modality, + subject = self.data_type.subject.label, + datatype = self.data_type.name, suffix = bids_type, return_type = 'filename' ) @@ -310,21 +172,21 @@ def register_data(self, derivatives=False, detect=True): ) # archive all files in a tar ball for downloading all files at once - files_to_archive = (os.path.join(self.data_dir, eeg_file_path),) + files_to_archive: list[str] = [os.path.join(self.data_dir, eeg_file_path)] if eegjson_file_path: - files_to_archive = (*files_to_archive, os.path.join(self.data_dir, eegjson_file_path)) + files_to_archive.append(os.path.join(self.data_dir, eegjson_file_path)) if fdt_file_path: - files_to_archive = (*files_to_archive, os.path.join(self.data_dir, fdt_file_path)) + files_to_archive.append(os.path.join(self.data_dir, fdt_file_path)) if electrode_file_path: - files_to_archive = (*files_to_archive, os.path.join(self.data_dir, electrode_file_path)) + files_to_archive.append(os.path.join(self.data_dir, electrode_file_path)) if event_file_paths: # archive all event files in a tar ball for event download - event_files_to_archive = () + event_files_to_archive: list[str] = [] for event_file_path in event_file_paths: - files_to_archive = (*files_to_archive, os.path.join(self.data_dir, event_file_path)) - event_files_to_archive = (*event_files_to_archive, os.path.join(self.data_dir, event_file_path)) + files_to_archive.append(os.path.join(self.data_dir, event_file_path)) + event_files_to_archive.append(os.path.join(self.data_dir, event_file_path)) event_archive_rel_name = os.path.splitext(event_file_paths[0])[0] + ".tgz" self.create_and_insert_event_archive( @@ -332,7 +194,7 @@ def register_data(self, derivatives=False, detect=True): ) if channel_file_path: - files_to_archive = (*files_to_archive, os.path.join(self.data_dir, channel_file_path)) + files_to_archive.append(os.path.join(self.data_dir, channel_file_path)) archive_rel_name = os.path.splitext(eeg_file_path)[0] + ".tgz" self.create_and_insert_archive( @@ -371,17 +233,17 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): if detect: # TODO if derivatives, grep the source file as well as the input file ID??? eeg_files = self.bids_layout.get( - subject = self.bids_sub_id, - session = self.bids_ses_id, + subject = self.data_type.subject.label, + session = self.data_type.session.label, scope = 'derivatives' if derivatives else 'raw', - suffix = self.bids_modality, + suffix = self.data_type.name, extension = ['set', 'edf', 'vhdr', 'vmrk', 'eeg', 'bdf'] ) else: eeg_files = self.bids_layout.get( - subject = self.bids_sub_id, - session = self.bids_ses_id, - suffix = self.bids_modality, + subject = self.data_type.subject.label, + session = self.data_type.session.label, + suffix = self.data_type.name, extension = ['set', 'edf', 'vhdr', 'vmrk', 'eeg', 'bdf'] ) @@ -395,7 +257,7 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): return_type = 'tuple', strict=False, extension = 'json', - suffix = self.bids_modality, + suffix = self.data_type.name, all_ = False, full_search = False, ) @@ -438,15 +300,14 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): # get the acquisition date of the EEG file or the age at the time of the EEG recording eeg_acq_time = None if self.scans_file: - scan_info = ScansTSV(self.scans_file, eeg_file.path, self.verbose) - eeg_acq_time = scan_info.get_acquisition_time() - eeg_file_data['age_at_scan'] = scan_info.get_age_at_scan() + tsv_scan = self.data_type.session.get_tsv_scan(os.path.basename(self.scans_file)) + + eeg_acq_time = tsv_scan.acquisition_time + eeg_file_data['age_at_scan'] = tsv_scan.age_at_scan if self.loris_bids_root_dir: # copy the scans.tsv file to the LORIS BIDS import directory - scans_path = scan_info.copy_scans_tsv_file_to_loris_bids_dir( - self.bids_sub_id, self.loris_bids_root_dir, self.data_dir - ) + scans_path = self.copy_scans_tsv_file_to_loris_bids_dir() eeg_file_data['scans_tsv_file'] = scans_path scans_blake2 = compute_file_blake2b_hash(self.scans_file) @@ -481,7 +342,7 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): if not physio_file_id: # grep the modality ID from physiological_modality table - modality_id = physiological_modality.grep_id_from_modality_value(self.bids_modality) + modality_id = physiological_modality.grep_id_from_modality_value(self.data_type.name) eeg_path = eeg_file.path.replace(self.data_dir, '') if self.loris_bids_root_dir: @@ -495,7 +356,7 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): eeg_file_info = { 'FileType': file_type, 'FilePath': eeg_path, - 'SessionID': self.session_id, + 'SessionID': self.session.id, 'AcquisitionTime': eeg_acq_time, 'InsertedByUser': getpass.getuser(), 'PhysiologicalOutputTypeID': output_type_id, @@ -601,7 +462,7 @@ def fetch_and_insert_electrode_file( suffix = 'coordsystem', all_ = False, full_search = False, - subject=self.bids_sub_id, + subject=self.data_type.subject.label, ) if not coordsystem_metadata_file: message = '\nWARNING: no electrode metadata files (coordsystem.json) ' \ @@ -762,7 +623,7 @@ def fetch_and_insert_event_files( suffix = 'events', all_ = False, full_search = False, - subject=self.bids_sub_id, + subject=self.data_type.subject.label, ) inheritance = False @@ -787,7 +648,7 @@ def fetch_and_insert_event_files( event_metadata=event_metadata, event_metadata_file=event_metadata_path, physiological_file_id=physiological_file_id, - project_id=self.project_id, + project_id=self.session.project_id, blake2=blake2, project_wide=False, hed_union=self.hed_union @@ -810,7 +671,7 @@ def fetch_and_insert_event_files( event_data=event_data, event_file=event_path, physiological_file_id=physiological_file_id, - project_id=self.project_id, + project_id=self.session.project_id, blake2=blake2, dataset_tag_dict=self.dataset_tag_dict, file_tag_dict=file_tag_dict, @@ -845,7 +706,8 @@ def copy_file_to_loris_bids_dir(self, file, derivatives=False, inheritance=False self.bids_layout.root, "" ) - copy_file = self.loris_bids_root_dir + copy_file + + copy_file = os.path.join(self.loris_bids_root_dir, copy_file) # create derivative directories lib.utilities.create_dir( @@ -857,17 +719,18 @@ def copy_file_to_loris_bids_dir(self, file, derivatives=False, inheritance=False copy_file = "" if not inheritance: copy_file = self.loris_bids_eeg_rel_dir - if self.bids_ses_id: + if self.data_type.session.label: copy_file += os.path.basename(file) else: # make sure the ses- is included in the new filename if using # default visit label from the LORIS config copy_file += str.replace( os.path.basename(file), - "sub-" + self.bids_sub_id, - "sub-" + self.bids_sub_id + "_ses-" + self.default_vl + "sub-" + self.data_type.subject.label, + "sub-" + self.data_type.subject.label + "_ses-" + self.default_vl ) - copy_file = self.loris_bids_root_dir + copy_file + + copy_file = os.path.join(self.loris_bids_root_dir, copy_file) # copy the file utilities.copy_file(file, copy_file, self.verbose) @@ -877,17 +740,13 @@ def copy_file_to_loris_bids_dir(self, file, derivatives=False, inheritance=False return relative_path - def create_and_insert_archive(self, files_to_archive, archive_rel_name, - eeg_file_id): + def create_and_insert_archive(self, files_to_archive: list[str], archive_rel_name: str, eeg_file_id): """ Create an archive with all electrophysiology files associated to a specific recording (including electrodes.tsv, channels.tsv etc...) - :param files_to_archive: tuple with the list of files to include in - the archive - :type files_to_archive: tuple + :param files_to_archive: list of files to include in the archive :param archive_rel_name: path to the archive relative to data_dir - :type archive_rel_name: str :param eeg_file_id : PhysiologicalFileID :type eeg_file_id : int """ @@ -935,15 +794,12 @@ def create_and_insert_archive(self, files_to_archive, archive_rel_name, } physiological.insert_archive_file(archive_info) - def create_and_insert_event_archive(self, files_to_archive, archive_rel_name, eeg_file_id): + def create_and_insert_event_archive(self, files_to_archive: list[str], archive_rel_name: str, eeg_file_id): """ Create an archive with all event files associated to a specific recording - :param files_to_archive: tuple with the list of files to include in - the archive - :type files_to_archive: tuple + :param files_to_archive: list of files to include in the archive :param archive_rel_name: path to the archive relative to data_dir - :type archive_rel_name: str :param eeg_file_id : PhysiologicalFileID :type eeg_file_id : int """ diff --git a/python/lib/imaging_lib/bids/dataset.py b/python/lib/imaging_lib/bids/dataset.py new file mode 100644 index 000000000..c4d457ccb --- /dev/null +++ b/python/lib/imaging_lib/bids/dataset.py @@ -0,0 +1,315 @@ +import re +from collections.abc import Iterator +from functools import cached_property +from pathlib import Path +from typing import TYPE_CHECKING + +from bids import BIDSLayout + +from lib.imaging_lib.bids.dataset_description import BidsDatasetDescription +from lib.imaging_lib.bids.tsv_participants import BidsTsvParticipant, read_bids_participants_tsv_file +from lib.imaging_lib.bids.tsv_scans import BidsTsvScan, read_bids_scans_tsv_file +from lib.util.fs import search_dir_file_with_regex +from lib.util.iter import find + +if TYPE_CHECKING: + from lib.imaging_lib.bids.eeg.dataset import BIDSEEGDataType + from lib.imaging_lib.bids.mri.dataset import BIDSMRIAcquisition, BIDSMRIDataType + + +PYBIDS_IGNORE = ['code', 'sourcedata', 'log', '.git'] + +PYBIDS_FORCE = [re.compile(r"_annotations\.(tsv|json)$")] + + +class BIDSDataset: + path: Path + validate: bool + + def __init__(self, bids_path: Path, validate: bool): + self.path = bids_path + self.validate = validate + + @property + def sessions(self) -> Iterator['BIDSSession']: + for subject in self.subjects: + yield from subject.sessions + + @property + def data_types(self) -> Iterator['BIDSDataType']: + for session in self.sessions: + yield from session.data_types + + @property + def niftis(self) -> Iterator['BIDSMRIAcquisition']: + from lib.imaging_lib.bids.mri.dataset import BIDSMRIDataType + for data_type in self.data_types: + if isinstance(data_type, BIDSMRIDataType): + yield from data_type.niftis + + @cached_property + def subjects(self) -> list['BIDSSubject']: + """ + The subject directories found in the BIDS dataset. + """ + + subjects: list[BIDSSubject] = [] + + for file in self.path.iterdir(): + subject_match = re.match(r'sub-([a-zA-Z0-9]+)', file.name) + if subject_match is None: + continue + + if not file.is_dir(): + continue + + subject_label = subject_match.group(1) + subjects.append(BIDSSubject(self, subject_label)) + + return subjects + + def get_dataset_description(self) -> 'BidsDatasetDescription | None': + """ + Read the BIDS dataset description file of this BIDS dataset. Return `None` if no dataset + description file is present in the dataset, or raise an exeption if the file is present but + does contains incorrect data. + """ + + dataset_description_path = self.path / 'dataset_description.json' + if not dataset_description_path.exists(): + return None + + return BidsDatasetDescription(dataset_description_path) + + @cached_property + def tsv_participants(self) -> dict[str, BidsTsvParticipant] | None: + """ + The set of participants in the 'participants.tsv' file of this BIDS dataset if it is + present. This property might raise an exception if the file is present but incorrect. + """ + + tsv_participants_path = self.path / 'participants.tsv' + if not tsv_participants_path.exists(): + return None + + return read_bids_participants_tsv_file(tsv_participants_path) + + @cached_property + def subject_labels(self) -> list[str]: + """ + All the subject labels found in the BIDS dataset. + """ + + subject_labels = list(set(subject.label for subject in self.subjects)) + subject_labels.sort() + return subject_labels + + @cached_property + def session_labels(self) -> list[str]: + """ + All the session labels found in this BIDS dataset. + """ + + session_labels = list(set(session.label for session in self.sessions if session.label is not None)) + session_labels.sort() + return session_labels + + def get_subject(self, subject_label: str) -> 'BIDSSubject | None': + """ + Get the subject directory corresponding to a subject label in this BIDS dataset or `None` + if it does not exist. + """ + + return find(lambda subject: subject.label == subject_label, self.subjects) + + def get_tsv_participant(self, participant_id: str) -> 'BidsTsvParticipant | None': + """ + Get the `participants.tsv` record corresponding to a participant ID in this BIDS dataset + or `None` if it does not exist. + """ + + if self.tsv_participants is None: + return None + + return self.tsv_participants.get(participant_id) + + @cached_property + def layout(self) -> BIDSLayout: + """ + Get the PyBIDS BIDSLayout for the BIDS dataset. + """ + + return BIDSLayout( + root = self.path, + ignore = PYBIDS_IGNORE, + force_index = PYBIDS_FORCE, + derivatives = True, + validate = self.validate + ) + + +class BIDSSubject: + root_dataset: BIDSDataset + path: Path + label: str + + def __init__(self, root_dataset: BIDSDataset, label: str): + self.root_dataset = root_dataset + self.label = label + self.path = self.root_dataset.path / f'sub-{self.label}' + + @property + def data_types(self) -> Iterator['BIDSDataType']: + for session in self.sessions: + yield from session.data_types + + @property + def niftis(self) -> Iterator['BIDSMRIAcquisition']: + from lib.imaging_lib.bids.mri.dataset import BIDSMRIDataType + for data_type in self.data_types: + if isinstance(data_type, BIDSMRIDataType): + yield from data_type.niftis + + @cached_property + def sessions(self) -> list['BIDSSession']: + """ + The session directories found in this subject directory. + """ + + sessions: list[BIDSSession] = [] + + for file in self.path.iterdir(): + if not file.is_dir(): + continue + + session_match = re.match(r'ses-([a-zA-Z0-9]+)', file.name) + if session_match is None: + continue + + session_label = session_match.group(1) + sessions.append(BIDSSession(self, session_label)) + + if sessions == []: + sessions.append(BIDSSession(self, None)) + + return sessions + + def get_session(self, session_label: str) -> 'BIDSSession | None': + """ + Get a session directory of this subject directory or `None` if it does not exist. + """ + + return find(lambda session: session.label == session_label, self.sessions) + + +class BIDSSession: + subject: BIDSSubject + path: Path + label: str | None + tsv_scans_path: Path | None + + def __init__(self, subject: BIDSSubject, label: str | None): + self.subject = subject + self.label = label + if label is not None: + self.path = subject.path / f'ses-{self.label}' + else: + self.path = subject.path + + self.tsv_scans_path = search_dir_file_with_regex(self.path, r'scans.tsv$') + + @property + def root_dataset(self) -> BIDSDataset: + return self.subject.root_dataset + + @property + def niftis(self) -> Iterator['BIDSMRIAcquisition']: + for data_type in self.mri_data_types: + yield from data_type.niftis + + @cached_property + def mri_data_types(self) -> list['BIDSMRIDataType']: + """ + The MRI data type directories found in this session directory. + """ + + from lib.imaging_lib.bids.mri.dataset import BIDSMRIDataType + + data_types: list[BIDSMRIDataType] = [] + + for data_type_name in ['anat', 'dwi', 'fmap', 'func']: + data_type_path = self.path / data_type_name + if data_type_path.is_dir(): + data_types.append(BIDSMRIDataType(self, data_type_name)) + + return data_types + + @cached_property + def eeg_data_types(self) -> list['BIDSEEGDataType']: + """ + The MRI data type directories found in this session directory. + """ + + from lib.imaging_lib.bids.eeg.dataset import BIDSEEGDataType + + data_types: list[BIDSEEGDataType] = [] + + for data_type_name in ['eeg', 'ieeg']: + data_type_path = self.path / data_type_name + if data_type_path.is_dir(): + data_types.append(BIDSEEGDataType(self, data_type_name)) + + return data_types + + @property + def data_types(self) -> Iterator['BIDSDataType']: + """ + The data type directories found in this session directory. + """ + + yield from self.mri_data_types + yield from self.eeg_data_types + + @cached_property + def tsv_scans(self) -> dict[str, BidsTsvScan] | None: + """ + The set of scans in the 'scans.tsv' file of this BIDS directory if it is present. This + property might raise an exception if the file is present but incorrect. + """ + + if self.tsv_scans_path is None: + return None + + return read_bids_scans_tsv_file(self.tsv_scans_path) + + def get_tsv_scan(self, file_name: str) -> 'BidsTsvScan | None': + """ + Get the `scans.tsv` record corresponding to a file name of this session directory or `None` + if it does not exist. + """ + + if self.tsv_scans is None: + return None + + return self.tsv_scans.get(file_name) + + +class BIDSDataType: + session: BIDSSession + path: Path + + def __init__(self, session: BIDSSession, name: str): + self.session = session + self.path = session.path / name + + @property + def name(self) -> str: + return self.path.name + + @property + def root_dataset(self) -> BIDSDataset: + return self.session.root_dataset + + @property + def subject(self) -> BIDSSubject: + return self.session.subject diff --git a/python/lib/imaging_lib/bids/dataset_description.py b/python/lib/imaging_lib/bids/dataset_description.py new file mode 100644 index 000000000..e9ee520ff --- /dev/null +++ b/python/lib/imaging_lib/bids/dataset_description.py @@ -0,0 +1,55 @@ +import json +from pathlib import Path +from typing import Any + + +class BidsDatasetDescriptionError(ValueError): + """ + Error raised when reading an incorrect BIDS dataset description file. + """ + + def __init__(self, message: str): + super().__init__(message) + + +class BidsDatasetDescription: + """ + Information about the contents of a BIDS dataset description file. + """ + + name: str + """ + The BIDS dataset name. + """ + + bids_version: str + """ + The BIDS dataset BIDS version. + """ + + json: dict[str, Any] + """ + The BIDS dataset description JSON data. + """ + + def __init__(self, dataset_descrption_path: Path): + """ + Read a BIDS dataset description file, or raise an exception if that file contains incorrect + data. + """ + + with open(dataset_descrption_path) as dataset_description_file: + try: + self.json = json.load(dataset_description_file) + except ValueError: + raise BidsDatasetDescriptionError("The BIDS dataset description file does not contain valid JSON.") + + try: + self.name = self.json["Name"] + except ValueError: + raise BidsDatasetDescriptionError("Missing property 'Name' in the BIDS dataset description file.") + + try: + self.bids_version = self.json["BIDSVersion"] + except ValueError: + raise BidsDatasetDescriptionError("Missing property 'BIDSVersion' in the BIDS dataset description file.") diff --git a/python/lib/imaging_lib/bids/eeg/dataset.py b/python/lib/imaging_lib/bids/eeg/dataset.py new file mode 100644 index 000000000..c3f4227d5 --- /dev/null +++ b/python/lib/imaging_lib/bids/eeg/dataset.py @@ -0,0 +1,6 @@ +# TODO: Complete with EEG-specific content. +from lib.imaging_lib.bids.dataset import BIDSDataType + + +class BIDSEEGDataType(BIDSDataType): + pass diff --git a/python/lib/bids.py b/python/lib/imaging_lib/bids/json.py similarity index 63% rename from python/lib/bids.py rename to python/lib/imaging_lib/bids/json.py index fe616d42d..b04e57f4d 100644 --- a/python/lib/bids.py +++ b/python/lib/imaging_lib/bids/json.py @@ -1,9 +1,13 @@ +import json +from pathlib import Path from typing import Any from lib.config import get_patient_id_dicom_header_config from lib.env import Env from lib.get_session_info import SessionInfo, get_session_info from lib.imaging_lib.mri_scanner import MriScannerInfo +from lib.import_bids_dataset.imaging import map_bids_param_to_loris_param +from lib.util.crypto import compute_file_blake2b_hash def get_bids_json_scanner_info(bids_json: dict[str, Any]) -> MriScannerInfo: @@ -36,3 +40,18 @@ def get_bids_json_session_info(env: Env, bids_json: dict[str, Any]) -> SessionIn scanner_info = get_bids_json_scanner_info(bids_json) return get_session_info(env, patient_id, scanner_info) + + +def add_bids_json_file_parameters(env: Env, bids_json_path: Path, rel_json_path: Path, file_parameters: dict[str, Any]): + """ + Read a BIDS JSON sidecar file and add its parameters to a LORIS file parameters dictionary. + """ + + with open(bids_json_path) as data_file: + file_parameters.update(json.load(data_file)) + map_bids_param_to_loris_param(env, file_parameters) + + json_blake2 = compute_file_blake2b_hash(bids_json_path) + + file_parameters['bids_json_file'] = str(rel_json_path) + file_parameters['bids_json_file_blake2b_hash'] = json_blake2 diff --git a/python/lib/imaging_lib/bids/mri/dataset.py b/python/lib/imaging_lib/bids/mri/dataset.py new file mode 100644 index 000000000..2b16cb33d --- /dev/null +++ b/python/lib/imaging_lib/bids/mri/dataset.py @@ -0,0 +1,65 @@ +import re +from functools import cached_property +from pathlib import Path + +from lib.imaging_lib.bids.dataset import BIDSDataset, BIDSDataType, BIDSSession, BIDSSubject +from lib.util.fs import remove_path_extension, replace_path_extension + + +class BIDSMRIDataType(BIDSDataType): + @cached_property + def niftis(self) -> list['BIDSMRIAcquisition']: + """ + The NIfTI files found in this MRI data type directory. + """ + + acquisitions: list[BIDSMRIAcquisition] = [] + + for file_path in self.path.iterdir(): + if file_path.name.endswith(('.nii', '.nii.gz')): + acquisitions.append(BIDSMRIAcquisition(self, file_path)) + + return acquisitions + + +class BIDSMRIAcquisition: + data_type: BIDSDataType + path: Path + nifti_path: Path + sidecar_path: Path | None + bval_path: Path | None + bvec_path: Path | None + suffix: str | None + + def __init__(self, data_type: BIDSDataType, nifti_path: Path): + self.data_type = data_type + self.path = remove_path_extension(nifti_path) + self.nifti_path = data_type.path / nifti_path + + sidecar_path = replace_path_extension(self.path, 'json') + self.sidecar_path = sidecar_path if sidecar_path.exists() else None + + bval_path = replace_path_extension(self.path, 'bval') + self.bval_path = bval_path if bval_path.exists() else None + + bvec_path = replace_path_extension(self.path, 'bvec') + self.bvec_path = bvec_path if bvec_path.exists() else None + + suffix_match = re.search(r'_([a-zA-Z0-9]+)$', self.name) + self.suffix = suffix_match.group(1) if suffix_match is not None else None + + @property + def name(self): + return self.path.name + + @property + def root_dataset(self) -> BIDSDataset: + return self.data_type.root_dataset + + @property + def subject(self) -> BIDSSubject: + return self.data_type.subject + + @property + def session(self) -> BIDSSession: + return self.data_type.session diff --git a/python/lib/imaging_lib/bids/tsv_participants.py b/python/lib/imaging_lib/bids/tsv_participants.py new file mode 100644 index 000000000..6a4c3ab72 --- /dev/null +++ b/python/lib/imaging_lib/bids/tsv_participants.py @@ -0,0 +1,124 @@ +import csv +import re +from dataclasses import dataclass +from pathlib import Path + +from dateutil.parser import ParserError, parse + + +@dataclass +class BidsTsvParticipant: + """ + Information about a participant found in a row of the `participants.tsv` file of a BIDS + dataset. + """ + + id: str + birth_date: str | None = None + sex: str | None = None + age: str | None = None + site: str | None = None + cohort: str | None = None + project: str | None = None + + +def read_bids_participants_tsv_file(participants_tsv_path: Path) -> dict[str, BidsTsvParticipant]: + """ + Read the `participants.tsv` file of a BIDS dataset and get the participant rows indexed by + participant ID. Raise an exception if the `participants.tsv` file is incorrect. + """ + + tsv_participants: dict[str, BidsTsvParticipant] = {} + with open(participants_tsv_path) as participants_tsv_file: + reader = csv.DictReader(participants_tsv_file.readlines(), delimiter='\t') + if reader.fieldnames is None or 'participant_id' not in reader.fieldnames: + raise Exception(f"Missing 'participant_id' field in participants.tsv file '{participants_tsv_path}'.") + + for tsv_participant_row in reader: + tsv_participant = read_bids_participants_tsv_row(tsv_participant_row, participants_tsv_path) + tsv_participants[tsv_participant.id] = tsv_participant + + return tsv_participants + + +def read_bids_participants_tsv_row( + tsv_participant_row: dict[str, str], + participants_tsv_path: Path, +) -> BidsTsvParticipant: + """ + Read a `participants.tsv` row, or raise an exception if that row is incorrect. + """ + + # Get the participant ID and removing the `sub-` prefix if it is present. + full_participant_id = tsv_participant_row.get('participant_id') + if full_participant_id is None: + raise Exception(f"Missing 'participant_id' value in participants.tsv file '{participants_tsv_path}'.") + + participant_id = re.sub(r'^sub-', '', full_participant_id) + + birth_date = _read_birth_date(tsv_participant_row) + cohort = _read_cohort(tsv_participant_row) + + # Create the BIDS participant object. + return BidsTsvParticipant( + id = participant_id, + birth_date = birth_date, + sex = tsv_participant_row.get('sex'), + age = tsv_participant_row.get('age'), + site = tsv_participant_row.get('site'), + project = tsv_participant_row.get('project'), + cohort = cohort, + ) + + +def write_bids_participants_tsv_file(tsv_participants: dict[str, BidsTsvParticipant], participants_file_path: Path): + """ + Write the `participants.tsv` file based from a set of participant rows. + """ + + with open(participants_file_path, 'w') as participants_file: + writer = csv.writer(participants_file, delimiter='\t') + writer.writerow(['participant_id']) + + for tsv_participant in sorted(tsv_participants.values(), key=lambda tsv_participant: tsv_participant.id): + writer.writerow([tsv_participant.id]) + + +def merge_bids_tsv_participants( + tsv_participants: dict[str, BidsTsvParticipant], + new_tsv_participants: dict[str, BidsTsvParticipant], +): + """ + Copy a set of participants.tsv rows into another one. The rows of the first set are replaced by + those of these second if there are duplicates. + """ + + for new_tsv_participant in new_tsv_participants.values(): + tsv_participants[new_tsv_participant.id] = new_tsv_participant + + +def _read_birth_date(tsv_participant_row: dict[str, str]) -> str | None: + """ + Read the date of birth field of a participant from a `participants.tsv` row. + """ + + for birth_date_field_ame in ['date_of_birth', 'birth_date', 'dob']: + if birth_date_field_ame in tsv_participant_row: + try: + return parse(tsv_participant_row[birth_date_field_ame]).strftime('%Y-%m-%d') + except ParserError: + pass + + return None + + +def _read_cohort(tsv_participant_row: dict[str, str]) -> str | None: + """ + Read the cohort field of a participant from a `participants.tsv` row. + """ + + for cohort_field_name in ['cohort', 'subproject']: + if cohort_field_name in tsv_participant_row: + return tsv_participant_row[cohort_field_name] + + return None diff --git a/python/lib/imaging_lib/bids/tsv_scans.py b/python/lib/imaging_lib/bids/tsv_scans.py new file mode 100644 index 000000000..052332261 --- /dev/null +++ b/python/lib/imaging_lib/bids/tsv_scans.py @@ -0,0 +1,127 @@ +import csv +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +from typing import Any + +from dateutil.parser import ParserError, parse + +from lib.util.crypto import compute_file_blake2b_hash + + +@dataclass +class BidsTsvScan: + """ + Information about a scan found in a row of a `scans.tsv` file of a BIDS dataset. + """ + + file_name : str + acquisition_time : datetime | None + age_at_scan : str | None + + +def read_bids_scans_tsv_file(scans_tsv_path: Path) -> dict[str, BidsTsvScan]: + """ + Read a `scans.tsv` file of a BIDS dataset and get the scan rows indexed by file name. Raise an + exception if the `scans.tsv` file is incorrect. + """ + + tsv_scans: dict[str, BidsTsvScan] = {} + with open(scans_tsv_path) as scans_tsv_file: + reader = csv.DictReader(scans_tsv_file.readlines(), delimiter='\t') + if reader.fieldnames is None or 'filename' not in reader.fieldnames: + raise Exception(f"Missing 'filename' field in scans.tsv file '{scans_tsv_path}'.") + + for tsv_scan_row in reader: + tsv_row = read_bids_scans_tsv_row(tsv_scan_row, scans_tsv_path) + tsv_scans[tsv_row.file_name] = tsv_row + + return tsv_scans + + +def read_bids_scans_tsv_row(tsv_scan_row: dict[str, str], scans_tsv_path: Path) -> BidsTsvScan: + """ + Read a `scans.tsv` row, or raise an exception if that row is incorrect. + """ + + file_name = tsv_scan_row.get('filename') + if file_name is None: + raise Exception(f"Missing 'filename' value in scans.tsv file '{scans_tsv_path}'.") + + acquisition_time = _read_acquisition_time(tsv_scan_row) + age_at_scan = _read_age_at_scan(tsv_scan_row) + + return BidsTsvScan( + file_name = file_name, + acquisition_time = acquisition_time, + age_at_scan = age_at_scan, + ) + + +def write_bids_scans_tsv_file(tsv_scans: dict[str, BidsTsvScan], scans_tsv_path: Path): + """ + Write the `scans.tsv` file from a set of scan rows. + """ + + with open(scans_tsv_path, 'w') as scans_tsv_file: + writer = csv.writer(scans_tsv_file, delimiter='\t') + writer.writerow(['filename', 'acq_time', 'age_at_scan']) + + for tsv_scan in sorted(tsv_scans.values(), key=lambda tsv_scan: tsv_scan.file_name): + writer.writerow([ + tsv_scan.file_name, + tsv_scan.acquisition_time, + tsv_scan.age_at_scan + ]) + + +def merge_bids_tsv_scans(tsv_scans: dict[str, BidsTsvScan], new_tsv_scans: dict[str, BidsTsvScan]): + """ + Copy a set of scans.tsv rows into another one. The rows of the first set are replaced by those + of these second if there are duplicates. + """ + + for new_tsv_scan in new_tsv_scans.values(): + tsv_scans[new_tsv_scan.file_name] = new_tsv_scan + + +def _read_acquisition_time(tsv_scan_row: dict[str, str]) -> datetime | None: + """ + Read the acquisition time field of a scan from a `scans.tsv` row. + """ + + for field_name in ['acq_time', 'mri_acq_time', 'eeg_acq_time']: + acquisition_time = tsv_scan_row.get(field_name) + if acquisition_time is None or acquisition_time == 'n/a': + continue + + try: + return parse(acquisition_time) + except ParserError: + pass + + return None + + +def _read_age_at_scan(tsv_scan_row: dict[str, str]) -> str | None: + """ + Read the age at scan field of a scan from a `scans.tsv` row. + """ + + for field_name in ['age', 'age_at_scan', 'age_acq_time']: + age_at_scan = tsv_scan_row.get(field_name) + if age_at_scan is not None: + return age_at_scan.strip() + + return None + + +def add_scan_tsv_file_parameters(scan_tsv: BidsTsvScan, scans_tsv_path: Path, file_parameters: dict[str, Any]): + """ + Add a scans.tsv file and row parameters to a LORIS file parameters dictionary. + """ + + file_parameters['scan_acquisition_time'] = scan_tsv.acquisition_time + file_parameters['age_at_scan'] = scan_tsv.age_at_scan + file_parameters['scans_tsv_file'] = scans_tsv_path + file_parameters['scans_tsv_file_bake2hash'] = compute_file_blake2b_hash(scans_tsv_path) diff --git a/python/lib/imaging_lib/bids/util.py b/python/lib/imaging_lib/bids/util.py new file mode 100644 index 000000000..89b9d5658 --- /dev/null +++ b/python/lib/imaging_lib/bids/util.py @@ -0,0 +1,20 @@ +import re + +from lib.db.queries.imaging_file_type import get_all_imaging_file_types +from lib.env import Env + + +def determine_bids_file_type(env: Env, file_name: str) -> str | None: + """ + Determine the file type of a BIDS file from the database using its name, or return `None` if no + corresponding file type is found. + """ + + imaging_file_types = get_all_imaging_file_types(env.db) + + for imaging_file_type in imaging_file_types: + regex = re.escape(imaging_file_type.type) + r'(\.gz)?$' + if re.search(regex, file_name): + return imaging_file_type.type + + return None diff --git a/python/lib/imaging_lib/file.py b/python/lib/imaging_lib/file.py new file mode 100644 index 000000000..64eb4f9c2 --- /dev/null +++ b/python/lib/imaging_lib/file.py @@ -0,0 +1,45 @@ +import getpass +from datetime import datetime + +from lib.db.models.file import DbFile +from lib.db.models.mri_scan_type import DbMriScanType +from lib.db.models.session import DbSession +from lib.env import Env + + +def register_imaging_file( + env: Env, + file_type: str, + file_rel_path: str, + session: DbSession, + mri_scan_type: DbMriScanType | None, + echo_time: float | None, + echo_number: str | None, + phase_encoding_direction: str | None, +) -> DbFile: + """ + Register an imaging file in the database. + """ + + user = getpass.getuser() + time = datetime.now() + + file = DbFile( + file_type = file_type, + rel_path = file_rel_path, + session_id = session.id, + inserted_by_user_id = user, + insert_time = time, + coordinate_space = 'native', + output_type = 'native', + echo_time = echo_time, + echo_number = echo_number, + phase_encoding_direction = phase_encoding_direction, + source_file_id = None, + scan_type_id = mri_scan_type.id if mri_scan_type is not None else None, + ) + + env.db.add(file) + env.db.commit() + + return file diff --git a/python/lib/imaging_lib/file_parameter.py b/python/lib/imaging_lib/file_parameter.py new file mode 100644 index 000000000..c1e1cb941 --- /dev/null +++ b/python/lib/imaging_lib/file_parameter.py @@ -0,0 +1,81 @@ +from datetime import datetime +from typing import Any + +from lib.db.models.file import DbFile +from lib.db.models.file_parameter import DbFileParameter +from lib.db.models.parameter_type import DbParameterType +from lib.db.models.parameter_type_category_rel import DbParameterTypeCategoryRel +from lib.db.queries.file_parameter import try_get_file_parameter_with_file_id_type_id +from lib.db.queries.parameter_type import get_parameter_type_category_with_name, try_get_parameter_type_with_name +from lib.env import Env + + +def register_file_parameters(env: Env, file: DbFile, parameter_infos: dict[str, Any]): + """ + Insert or upate some file parameters with the provided parameter names and values. + """ + + for parameter_name, parameter_value in parameter_infos.items(): + register_file_parameter(env, file, parameter_name, parameter_value) + + +def register_file_parameter(env: Env, file: DbFile, parameter_name: str, parameter_value: Any): + """ + Insert or upate a file parameter with the provided parameter name and value. + """ + + if isinstance(parameter_value, list): + parameter_values = map(lambda parameter_value: str(parameter_value), parameter_value) # type: ignore + parameter_value = f"[{', '.join(parameter_values)}]" + + parameter_type = get_or_create_parameter_type(env, parameter_name) + + parameter = try_get_file_parameter_with_file_id_type_id(env.db, file.id, parameter_type.id) + if parameter is None: + time = datetime.now() + + parameter = DbFileParameter( + type_id = parameter_type.id, + file_id = file.id, + value = parameter_value, + insert_time = time, + ) + + env.db.add(parameter) + else: + parameter.value = parameter_value + + env.db.commit() + + +def get_or_create_parameter_type(env: Env, parameter_name: str) -> DbParameterType: + """ + Get a parameter type using its name, or create that parameter if it does not exist. + """ + + parameter_type = try_get_parameter_type_with_name(env.db, parameter_name) + if parameter_type is not None: + return parameter_type + + parameter_type = DbParameterType( + name = parameter_name, + alias = None, + data_type = 'text', + description = f'{parameter_name} created by the lib.imaging.file_parameter Python module', + source_from = 'parameter_file', + queryable = False, + ) + + env.db.add(parameter_type) + env.db.commit() + + parameter_type_category = get_parameter_type_category_with_name(env.db, 'MRI Variables') + parameter_type_category_rel = DbParameterTypeCategoryRel( + parameter_type_id = parameter_type.id, + parameter_type_category_id = parameter_type_category.id, + ) + + env.db.add(parameter_type_category_rel) + env.db.commit() + + return parameter_type diff --git a/python/lib/imaging_lib/mri_scan_type.py b/python/lib/imaging_lib/mri_scan_type.py new file mode 100644 index 000000000..df648affd --- /dev/null +++ b/python/lib/imaging_lib/mri_scan_type.py @@ -0,0 +1,17 @@ +from lib.db.models.mri_scan_type import DbMriScanType +from lib.env import Env + + +def create_mri_scan_type(env: Env, name: str) -> DbMriScanType: + """ + Create an MRI scan type in the database. + """ + + mri_scan_type = DbMriScanType( + name = name, + ) + + env.db.add(mri_scan_type) + env.db.commit() + + return mri_scan_type diff --git a/python/lib/imaging_lib/nifti.py b/python/lib/imaging_lib/nifti.py index 7d8c5697f..267b597e7 100644 --- a/python/lib/imaging_lib/nifti.py +++ b/python/lib/imaging_lib/nifti.py @@ -1,13 +1,12 @@ -import os -from collections.abc import Iterator +from pathlib import Path from typing import Any, cast import nibabel as nib -def add_nifti_spatial_file_parameters(nifti_path: str, file_parameters: dict[str, Any]): +def add_nifti_file_parameters(nifti_path: Path, nifti_file_hash: str, file_parameters: dict[str, Any]): """ - Read a NIfTI image and add its spatial metadata to the file parameters. + Read a NIfTI image and add some of its properties to the file parameters. """ img = nib.load(nifti_path) # type: ignore @@ -30,12 +29,5 @@ def add_nifti_spatial_file_parameters(nifti_path: str, file_parameters: dict[str else: file_parameters['time'] = None - -def find_dir_nifti_names(dir_path: str) -> Iterator[str]: - """ - Iterate over the names of the NIfTI files found in a directory. - """ - - for file_name in os.listdir(dir_path): - if file_name.endswith(('.nii', '.nii.gz')): - yield file_name + # Add the file BLAKE2b hash. + file_parameters['file_blake2b_hash'] = nifti_file_hash diff --git a/python/lib/imaging_lib/nifti_pic.py b/python/lib/imaging_lib/nifti_pic.py new file mode 100644 index 000000000..1e702b839 --- /dev/null +++ b/python/lib/imaging_lib/nifti_pic.py @@ -0,0 +1,67 @@ +import os +import re + +import nibabel as nib +import numpy as np +from nibabel.nifti1 import Nifti1Image +from nilearn import plotting + +from lib.config import get_data_dir_path_config +from lib.db.models.file import DbFile +from lib.env import Env + + +def create_imaging_pic(env: Env, file: DbFile, is_4d_data: bool) -> str: + """ + Creates the preview pic that will show in the imaging browser view session + page. This pic will be stored in the data_dir/pic folder + + :param file_info: dictionary with file information (path, file_id, cand_id...) + :type file_info: dict + :param pic_rel_path: relative path to the pic to use if one provided. Otherwise + create_imaging_pic will automatically generate the pic name + based on the file path of the NIfTI file + :type pic_rel_path: str + + :return: path to the created pic + :rtype: str + """ + + data_dir_path = get_data_dir_path_config(env) + + cand_id = file.session.candidate.cand_id + file_path = os.path.join(data_dir_path, file.rel_path) + + pic_name = re.sub(r"\.nii(\.gz)?$", f'_{file.id}_check.png', os.path.basename(file.rel_path)) + pic_rel_path = os.path.join(str(cand_id), pic_name) + pic_dir_path = os.path.join(data_dir_path, 'pic', str(cand_id)) + pic_path = os.path.join(data_dir_path, 'pic', pic_rel_path) + + # create the candID directory where the pic will go if it does not already exist + if not os.path.exists(pic_dir_path): + os.mkdir(pic_dir_path) + + img = nib.load(file_path) # type: ignore + + if is_4d_data: + # Only load the first slice of a 4D image. + data = img.dataobj[..., 0] # type: ignore + else: + data = img.dataobj[...] # type: ignore + + # Load the image as float32 for plotting. + volume = Nifti1Image( + data.astype(np.float32, copy=False), # type: ignore + img.affine, # type: ignore + ) + + plotting.plot_anat( # type: ignore + anat_img=volume, + output_file=pic_path, + display_mode='ortho', + black_bg=True, # type: ignore + draw_cross=False, + annotate=False, + ) + + return pic_rel_path diff --git a/python/lib/import_bids_dataset/args.py b/python/lib/import_bids_dataset/args.py new file mode 100644 index 000000000..393c8f1f9 --- /dev/null +++ b/python/lib/import_bids_dataset/args.py @@ -0,0 +1,13 @@ +from dataclasses import dataclass +from typing import Literal + + +@dataclass +class Args: + source_bids_path: str + type: Literal[None, 'raw', 'derivative'] + bids_validation: bool + create_candidate: bool + create_session: bool + copy: bool + verbose: bool diff --git a/python/lib/import_bids_dataset/check_subjects_sessions.py b/python/lib/import_bids_dataset/check_subjects_sessions.py new file mode 100644 index 000000000..b56798971 --- /dev/null +++ b/python/lib/import_bids_dataset/check_subjects_sessions.py @@ -0,0 +1,428 @@ +import random +from datetime import datetime + +from dateutil.parser import ParserError, parse +from sqlalchemy.orm import Session as Database + +from lib.config import get_default_bids_visit_label_config +from lib.db.models.candidate import DbCandidate +from lib.db.models.cohort import DbCohort +from lib.db.models.project import DbProject +from lib.db.models.session import DbSession +from lib.db.models.site import DbSite +from lib.db.queries.candidate import try_get_candidate_with_cand_id, try_get_candidate_with_psc_id +from lib.db.queries.cohort import try_get_cohort_with_name +from lib.db.queries.project import try_get_project_with_alias, try_get_project_with_name +from lib.db.queries.session import try_get_session_with_cand_id_visit_label +from lib.db.queries.sex import try_get_sex_with_name +from lib.db.queries.site import try_get_site_with_alias, try_get_site_with_name +from lib.db.queries.visit import try_get_visit_with_visit_label +from lib.env import Env +from lib.imaging_lib.bids.dataset import BIDSDataset, BIDSSubject +from lib.imaging_lib.bids.tsv_participants import BidsTsvParticipant +from lib.logging import log, log_error, log_error_exit + + +class CheckBidsSubjectSessionError(Exception): + """ + Exception raised if the check or creation of a candidate or session from a BIDS dataset fails. + """ + + def __init__(self, message: str): + super().__init__(message) + + +def check_bids_session_labels( + env: Env, + bids: BIDSDataset, +): + """ + Check that all the session labels in a BIDS dataset correspond to a LORIS visit, or exit the + program with an error if that is not the case. + """ + + unknown_session_labels: list[str] = [] + + for session_label in bids.session_labels: + visit = try_get_visit_with_visit_label(env.db, session_label) + if visit is None: + unknown_session_labels.append(session_label) + + if unknown_session_labels != []: + log_error_exit( + env, + ( + f"Found {len(unknown_session_labels)} unknown session labels in the BIDS dataset. Unknown session" + f" labels are: {', '.join(unknown_session_labels)}. Each BIDS session label should correspond to a" + " LORIS visit label." + ) + ) + + +def check_or_create_bids_subjects_and_sessions( + env: Env, + bids: BIDSDataset, + create_candidate: bool, + create_session: bool, +) -> int: + """ + Check that the subjects and sessions of a BIDS dataset correspond to LORIS candidates and + sessions, or create them using information extracted from the BIDS dataset if the relevant + arguments are passed. + + Exit the program with an error if the check or creation of any candidate or session fails. + Return the project ID of the last candidate processed. + """ + + try: + # Read the participants.tsv property to raise an exception if the file is incorrect. + bids.tsv_participants + except Exception as exception: + log_error_exit(env, f"Error while reading the participants.tsv file. Full error:\n{exception}") + + candidate = None + errors: list[Exception] = [] + + for subject in bids.subjects: + try: + candidate = check_or_create_bids_subject_and_sessions(env, subject, create_candidate, create_session) + except Exception as error: + log_error(env, str(error)) + errors.append(error) + + if errors != []: + error_message = f"Found {len(errors)} errors while checking BIDS subjects and sessions." + if create_candidate or create_session: + error_message += " No candidate or session has been created." + + log_error_exit(env, error_message) + + if candidate is None: + log_error_exit(env, "No subject found in the BIDS dataset.") + + # Only commit the new candidates and sessions if no error has occured. + env.db.commit() + + # Return the project ID of a candidate of the BIDS dataset. For this value to be used, it + # should be assumed that all the candidates of the BIDS dataset are in the same project. + return candidate.registration_project_id + + +def check_or_create_bids_subject_and_sessions( + env: Env, + subject: BIDSSubject, + create_candidate: bool, + create_session: bool, +) -> DbCandidate: + """ + Check that a BIDS subject and its sessions correspond to a LORIS candidate and its sessions, or + create them using information extracted from the BIDS dataset if the relevant arguments are + passed. + + Raise an error if the check or creation of the candidate or any of its sessions fail. Return + the candidate corresponding to the BIDS subject. + """ + + tsv_participant = subject.root_dataset.get_tsv_participant(subject.label) + if tsv_participant is None: + raise CheckBidsSubjectSessionError( + f"No participants.tsv entry found for subject label '{subject.label}' in the BIDS dataset. The BIDS" + " directory subjects do not match the participants.tsv file." + ) + + candidate = check_or_create_bids_subject(env, tsv_participant, create_candidate) + + if create_session: + cohort = get_tsv_participant_cohort(env, tsv_participant) + else: + cohort = None + + for session in subject.sessions: + if session.label is not None: + visit_label = session.label + else: + visit_label = get_default_bids_visit_label_config(env) + + check_or_create_bids_session(env, candidate, cohort, visit_label, create_session) + + return candidate + + +def check_or_create_bids_subject(env: Env, tsv_participant: BidsTsvParticipant, create_candidate: bool) -> DbCandidate: + """ + Check that the subject of a BIDS participants.tsv row exists in LORIS, or create them using the + information of that row if the relevant argument is passed. Raise an exception if the candidate + does not exist or cannot be created. + """ + + try: + cand_id = int(tsv_participant.id) + candidate = try_get_candidate_with_cand_id(env.db, cand_id) + if candidate is None: + raise CheckBidsSubjectSessionError( + f"No LORIS candidate found for the BIDS participant ID '{tsv_participant.id}' (identified as a CandID)." + ) + + return candidate + except ValueError: + pass + + candidate = try_get_candidate_with_psc_id(env.db, tsv_participant.id) + if candidate is not None: + return candidate + + if not create_candidate: + raise CheckBidsSubjectSessionError( + f"No LORIS candidate found for the BIDS participant ID '{tsv_participant.id}' (identified as a PSCID)." + ) + + return create_bids_candidate(env, tsv_participant) + + +def create_bids_candidate(env: Env, tsv_participant: BidsTsvParticipant) -> DbCandidate: + """ + Check a candidate using the information of a BIDS participants.tsv row, or raise an exception + if that candidate cannot be created. + """ + + log(env, f"Creating LORIS candidate for BIDS subject '{tsv_participant.id}'...") + + psc_id = tsv_participant.id + + cand_id = generate_new_cand_id(env.db) + + birth_date = get_tsv_participant_birth_date(tsv_participant) + + sex = get_tsv_participant_sex(env, tsv_participant) + + site = get_tsv_participant_site(env, tsv_participant) + + project = get_tsv_participant_project(env, tsv_participant) + + log( + env, + ( + "Creating candidate with information:\n" + f" PSCID = {psc_id}\n" + f" CandID = {cand_id}\n" + f" Site = {site.name}\n" + f" Project = {project.name}" + ) + ) + + now = datetime.now() + + candidate = DbCandidate( + cand_id = cand_id, + psc_id = psc_id, + date_of_birth = birth_date, + sex = sex, + registration_site_id = site.id, + registration_project_id = project.id, + user_id = 'imaging.py', + entity_type = 'Human', + date_active = now, + date_registered = now, + active = True, + ) + + env.db.add(candidate) + env.db.flush() + + return candidate + + +def check_or_create_bids_session( + env: Env, + candidate: DbCandidate, + cohort: DbCohort | None, + visit_label: str, + create_session: bool, +) -> DbSession: + """ + Check that a BIDS session exists in LORIS, or create it using information previously obtained + from the BIDS dataset if the relevant argument is passed. Raise an exception if the session + does not exist or cannot be created. + """ + + session = try_get_session_with_cand_id_visit_label(env.db, candidate.cand_id, visit_label) + if session is not None: + return session + + if not create_session: + log_error_exit( + env, + f"No session found for candidate '{candidate.psc_id}' and visit label '{visit_label}'." + ) + + return create_bids_session(env, candidate, cohort, visit_label) + + +def create_bids_session(env: Env, candidate: DbCandidate, cohort: DbCohort | None, visit_label: str) -> DbSession: + """ + Create a session using information previously obtained from the BIDS dataset, or raise an + exception if the session does not exist or cannot be created. + """ + + if cohort is None: + log_error_exit(env, f"No cohort found for candidate '{candidate.psc_id}', cannot create session.") + + log( + env, + ( + "Creating session with:\n" + f" PSCID = {candidate.psc_id}\n" + f" Visit label = {visit_label}" + ) + ) + + session = DbSession( + candidate_id = candidate.id, + visit_label = visit_label, + current_stage = 'Not Started', + site_id = candidate.registration_site_id, + project_id = candidate.registration_project_id, + cohort_id = cohort.id, + scan_done = True, + submitted = False, + active = True, + user_id = '', + hardcopy_request = '-', + mri_qc_status = '', + mri_qc_pending = False, + mri_caveat = True, + ) + + env.db.add(session) + env.db.flush() + + return session + + +def get_tsv_participant_birth_date(tsv_participant: BidsTsvParticipant) -> datetime | None: + """ + Get the birth date of a BIDS participants.tsv row, or return `None` if no birth date is + specified. Raise an exception if a birth date is specified but cannot be parsed. + """ + + if tsv_participant.birth_date is None: + return None + + try: + return parse(tsv_participant.birth_date) + except ParserError: + raise CheckBidsSubjectSessionError( + f"Could not parse the BIDS participants.tsv birth date '{tsv_participant.birth_date}'." + ) + + +def get_tsv_participant_sex(env: Env, tsv_participant: BidsTsvParticipant) -> str | None: + """ + Get the sex of a BIDS participants.tsv row, or return `None` if no sex is specified. Raise an + exception if a sex is specified but does not exist in LORIS. + """ + + if tsv_participant.sex is None: + return None + + tsv_participant_sex = tsv_participant.sex.lower() + + if tsv_participant_sex in ['m', 'male']: + sex_name = 'Male' + elif tsv_participant_sex in ['f', 'female']: + sex_name = 'Female' + elif tsv_participant_sex in ['o', 'other']: + sex_name = 'Other' + else: + sex_name = tsv_participant.sex + + sex = try_get_sex_with_name(env.db, sex_name) + if sex is None: + raise CheckBidsSubjectSessionError( + f"No LORIS sex found for the BIDS participants.tsv sex name or alias '{tsv_participant.sex}'." + ) + + return sex.name + + +def get_tsv_participant_site(env: Env, tsv_participant: BidsTsvParticipant) -> DbSite: + """ + Get the site of a BIDS participants.tsv row, or raise an exception if no site is specified or + the site does not exist in LORIS. + """ + + if tsv_participant.site is None: + raise CheckBidsSubjectSessionError( + "No 'site' column found in the BIDS participants.tsv file, this field is required to create candidates or" + " sessions. " + ) + + site = try_get_site_with_name(env.db, tsv_participant.site) + if site is not None: + return site + + site = try_get_site_with_alias(env.db, tsv_participant.site) + if site is not None: + return site + + raise CheckBidsSubjectSessionError( + f"No site found for the BIDS participants.tsv site name or alias '{tsv_participant.site}'." + ) + + +def get_tsv_participant_project(env: Env, tsv_participant: BidsTsvParticipant) -> DbProject: + """ + Get the project of a BIDS participants.tsv row, or raise an exception if no project is + specified or the project does not exist in LORIS. + """ + + if tsv_participant.project is None: + raise CheckBidsSubjectSessionError( + "No 'project' column found in the BIDS participants.tsv file, this field is required to create candidates" + " or sessions. " + ) + + project = try_get_project_with_name(env.db, tsv_participant.project) + if project is not None: + return project + + project = try_get_project_with_alias(env.db, tsv_participant.project) + if project is not None: + return project + + raise CheckBidsSubjectSessionError( + f"No project found for the BIDS participants.tsv project name or alias '{tsv_participant.project}'." + ) + + +def get_tsv_participant_cohort(env: Env, tsv_participant: BidsTsvParticipant) -> DbCohort: + """ + Get the cohort of a BIDS participants.tsv row, or raise an exception if no cohort is specified + or the cohort does not exist in LORIS. + """ + + if tsv_participant.cohort is None: + raise CheckBidsSubjectSessionError( + "No 'cohort' column found in the BIDS participants.tsv file, this field is required to create session." + ) + + cohort = try_get_cohort_with_name(env.db, tsv_participant.cohort) + if cohort is None: + raise CheckBidsSubjectSessionError( + f"No cohort found for the BIDS participants.tsv cohort name '{tsv_participant.cohort}'." + ) + + return cohort + + +# TODO: Move this function to a more appropriate place. +def generate_new_cand_id(db: Database) -> int: + """ + Generate a new random CandID that is not already in the database. + """ + + while True: + cand_id = random.randint(100000, 999999) + candidate = try_get_candidate_with_cand_id(db, cand_id) + if candidate is None: + return cand_id diff --git a/python/lib/import_bids_dataset/env.py b/python/lib/import_bids_dataset/env.py new file mode 100644 index 000000000..85b98df8e --- /dev/null +++ b/python/lib/import_bids_dataset/env.py @@ -0,0 +1,30 @@ +from dataclasses import dataclass +from pathlib import Path + + +@dataclass +class BIDSImportEnv: + """ + Pipeline-specific variables of the BIDS dataset import pipeline. + """ + + data_dir_path : Path + loris_bids_path : Path | None + total_files_count : int + imported_files_count : int + ignored_files_count : int + failed_files_count : int + unknown_scan_types : list[str] + + def __init__(self, data_dir_path: Path, loris_bids_path: Path | None, total_files_count: int): + self.data_dir_path = data_dir_path + self.loris_bids_path = loris_bids_path + self.total_files_count = total_files_count + self.imported_files_count = 0 + self.ignored_files_count = 0 + self.failed_files_count = 0 + self.unknown_scan_types = [] + + @property + def processed_files_count(self) -> int: + return self.imported_files_count + self.ignored_files_count + self.failed_files_count diff --git a/python/lib/import_bids_dataset/events.py b/python/lib/import_bids_dataset/events.py new file mode 100644 index 000000000..b705683b9 --- /dev/null +++ b/python/lib/import_bids_dataset/events.py @@ -0,0 +1,70 @@ +import json +import os +from pathlib import Path +from typing import Any + +import lib.utilities +from lib.database import Database +from lib.env import Env +from lib.imaging_lib.bids.dataset import BIDSDataset +from lib.import_bids_dataset.args import Args +from lib.logging import log_warning +from lib.physiological import Physiological +from lib.util.crypto import compute_file_blake2b_hash + + +def get_events_metadata( + env: Env, + args: Args, + bids: BIDSDataset, + legacy_db: Database, + loris_bids_path: Path | None, + project_id: int, +) -> dict[Any, Any]: + """ + Get the root level 'events.json' data, assuming a singe project for the BIDS dataset. + """ + + root_event_metadata_file = bids.layout.get_nearest( # type: ignore + bids.path, + return_type='tuple', + strict=False, + extension='json', + suffix='events', + all_=False, + subject=None, + session=None, + ) + + if not root_event_metadata_file: + log_warning(env, "No event metadata files (events.json) in the BIDS root directory.") + return {} + + # Copy the event file to the LORIS BIDS import directory. + + copy_file = str.replace(root_event_metadata_file.path, bids.layout.root, '') # type: ignore + + if loris_bids_path is not None: + event_metadata_path = os.path.join(loris_bids_path, copy_file) + lib.utilities.copy_file(root_event_metadata_file.path, event_metadata_path, args.verbose) # type: ignore + + hed_query = 'SELECT * FROM hed_schema_nodes WHERE 1' + hed_union = legacy_db.pselect(query=hed_query, args=()) # type: ignore + + # load json data + with open(root_event_metadata_file.path) as metadata_file: # type: ignore + event_metadata = json.load(metadata_file) + + blake2 = compute_file_blake2b_hash(root_event_metadata_file.path) # type: ignore + physio = Physiological(legacy_db, args.verbose) + _, dataset_tag_dict = physio.insert_event_metadata( # type: ignore + event_metadata=event_metadata, + event_metadata_file=event_metadata_path, # type: ignore + physiological_file_id=None, + project_id=project_id, + blake2=blake2, + project_wide=True, + hed_union=hed_union # type: ignore + ) + + return dataset_tag_dict # type: ignore diff --git a/python/lib/import_bids_dataset/imaging.py b/python/lib/import_bids_dataset/imaging.py new file mode 100644 index 000000000..9390aa18b --- /dev/null +++ b/python/lib/import_bids_dataset/imaging.py @@ -0,0 +1,48 @@ +from typing import Any + +from lib.db.queries.parameter_type import get_all_parameter_types +from lib.env import Env + + +def map_bids_param_to_loris_param(env: Env, file_parameters: dict[str, Any]): + """ + Maps the BIDS parameters found in the BIDS JSON file with the + parameter type names of LORIS. + + :param file_parameters: dictionary with the list of parameters + found in the BIDS JSON file + :type file_parameters: dict + + :return: returns a dictionary with the BIDS JSON parameter names + as well as their LORIS equivalent + :rtype: dict + """ + + parameter_types_mapping = get_bids_to_minc_parameter_types_mapping(env) + + # Map BIDS parameters with the LORIS ones. + for file_parameter in list(file_parameters.keys()): + file_parameter_type = parameter_types_mapping.get(file_parameter) + if file_parameter_type is not None: + file_parameters[file_parameter_type] = file_parameters[file_parameter] + + +def get_bids_to_minc_parameter_types_mapping(env: Env) -> dict[str, str]: + """ + Queries the BIDS to MINC mapping dictionary stored in the paramater_type table and returns a + dictionary with the BIDS term as keys and the MINC terms as values. + + :return: BIDS to MINC mapping dictionary + :rtype: dict + """ + + parameter_types = get_all_parameter_types(env.db) + + parameter_types_mapping: dict[str, str] = {} + for parameter_type in parameter_types: + if parameter_type.alias is None: + continue + + parameter_types_mapping[parameter_type.alias] = parameter_type.name + + return parameter_types_mapping diff --git a/python/lib/import_bids_dataset/main.py b/python/lib/import_bids_dataset/main.py new file mode 100644 index 000000000..d887b618e --- /dev/null +++ b/python/lib/import_bids_dataset/main.py @@ -0,0 +1,319 @@ +import os +import re +import shutil +from pathlib import Path +from typing import Any + +from lib.config import get_data_dir_path_config, get_default_bids_visit_label_config +from lib.database import Database +from lib.db.models.session import DbSession +from lib.db.queries.candidate import try_get_candidate_with_psc_id +from lib.db.queries.session import try_get_session_with_cand_id_visit_label +from lib.eeg import Eeg +from lib.env import Env +from lib.imaging_lib.bids.dataset import BIDSDataset, BIDSDataType, BIDSSession +from lib.imaging_lib.bids.dataset_description import BidsDatasetDescriptionError +from lib.imaging_lib.bids.eeg.dataset import BIDSEEGDataType +from lib.imaging_lib.bids.mri.dataset import BIDSMRIDataType +from lib.imaging_lib.bids.tsv_participants import ( + BidsTsvParticipant, + merge_bids_tsv_participants, + read_bids_participants_tsv_file, + write_bids_participants_tsv_file, +) +from lib.imaging_lib.bids.tsv_scans import ( + BidsTsvScan, + merge_bids_tsv_scans, + read_bids_scans_tsv_file, + write_bids_scans_tsv_file, +) +from lib.import_bids_dataset.args import Args +from lib.import_bids_dataset.check_subjects_sessions import ( + check_bids_session_labels, + check_or_create_bids_subjects_and_sessions, +) +from lib.import_bids_dataset.env import BIDSImportEnv +from lib.import_bids_dataset.events import get_events_metadata +from lib.import_bids_dataset.mri import import_bids_nifti +from lib.import_bids_dataset.print import print_bids_import_summary +from lib.logging import log, log_error, log_error_exit, log_warning +from lib.util.iter import count + +BIDS_EEG_DATA_TYPES = ['eeg', 'ieeg'] + +BIDS_MRI_DATA_TYPES = ['anat', 'dwi', 'fmap', 'func'] + + +def import_bids_dataset(env: Env, args: Args, legacy_db: Database): + """ + Read the provided BIDS dataset and import it into LORIS. + """ + + data_dir_path = Path(get_data_dir_path_config(env)) + + log(env, "Parsing BIDS dataset...") + + bids = BIDSDataset(Path(args.source_bids_path), args.bids_validation) + + niftis_count = count(bids.niftis) + + log(env, f"Found {niftis_count} NIfTI files.") + + log(env, f"Found {len(bids.subject_labels)} subjects:") + for subject_label in bids.subject_labels: + log(env, f"- {subject_label}") + + log(env, f"Found {len(bids.session_labels)} sessions:") + for session_label in bids.session_labels: + log(env, f"- {session_label}") + + # Check the BIDS subject and session labels and create their candidates and sessions in LORIS + # if needed. + + check_bids_session_labels(env, bids) + + project_id = check_or_create_bids_subjects_and_sessions(env, bids, args.create_candidate, args.create_session) + + # Get the LORIS BIDS import directory path and create the directory if needed. + + if args.copy: + loris_bids_path = get_loris_bids_path(env, bids, data_dir_path) + else: + loris_bids_path = None + + # Get the BIDS events metadata. + + events_metadata = get_events_metadata(env, args, bids, legacy_db, loris_bids_path, project_id) + + # Copy the `participants.tsv` file rows. + + if loris_bids_path is not None and bids.tsv_participants is not None: + loris_participants_tsv_path = loris_bids_path / 'participants.tsv' + copy_bids_tsv_participants(bids.tsv_participants, loris_participants_tsv_path) + + # Process each session directory. + + import_env = BIDSImportEnv( + data_dir_path = data_dir_path, + loris_bids_path = loris_bids_path, + total_files_count = niftis_count, + ) + + for bids_session in bids.sessions: + import_bids_session(env, import_env, args, bids_session, events_metadata, legacy_db) + + # Copy the static BIDS files. + + if loris_bids_path is not None: + copy_static_dataset_files(bids.path, loris_bids_path) + + # Print import summary. + + print_bids_import_summary(env, import_env) + + +def import_bids_session( + env: Env, + import_env: BIDSImportEnv, + args: Args, + bids_session: BIDSSession, + events_metadata: dict[Any, Any], + legacy_db: Database, +): + """ + Read the provided BIDS session directory and import it into LORIS. + """ + + log(env, f"Importing files for subject '{bids_session.subject.label}' and session '{bids_session.label}'.") + + candidate = try_get_candidate_with_psc_id(env.db, bids_session.subject.label) + if candidate is None: + # This should not happen as BIDS subject labels should have been checked previously. + log_error_exit(env, f"Candidate not found for PSCID '{bids_session.subject.label}'.") + + if bids_session.label is not None: + visit_label = bids_session.label + else: + visit_label = get_default_bids_visit_label_config(env) + + session = try_get_session_with_cand_id_visit_label(env.db, candidate.cand_id, visit_label) + if session is None: + # This should not happen as BIDS session labels should have been checked previously. + log_error_exit(env, f"Visit not found for visit label '{visit_label}'.") + + try: + # Read the scans.tsv property to raise an exception if the file is incorrect. + tsv_scans = bids_session.tsv_scans + + if import_env.loris_bids_path is not None and tsv_scans is not None: + loris_scans_tsv_path = ( + import_env.loris_bids_path + / f'sub-{bids_session.subject.label}' + / f'ses-{bids_session.label}' + / f'sub-{bids_session.subject.label}_ses-{bids_session.label}_scans.tsv' + ) + + copy_bids_tsv_scans(tsv_scans, loris_scans_tsv_path) + except Exception as exception: + log_warning( + env, + f"Error while reading the session scans.tsv file, scans.tsv data will be ignored. Full error:\n{exception}" + ) + + # Process each data type directory. + + for data_type in bids_session.data_types: + import_bids_data_type_files(env, import_env, args, session, data_type, events_metadata, legacy_db) + + +def import_bids_data_type_files( + env: Env, + import_env: BIDSImportEnv, + args: Args, + session: DbSession, + data_type: BIDSDataType, + events_metadata: dict[Any, Any], + legacy_db: Database, +): + """ + Read the provided BIDS data type directory and import it into LORIS. + """ + + match data_type: + case BIDSMRIDataType(): + import_bids_mri_data_type_files(env, import_env, args, session, data_type) + case BIDSEEGDataType(): + import_bids_eeg_data_type_files(env, import_env, args, session, data_type, events_metadata, legacy_db) + case _: + log_warning(env, f"Unknown data type '{data_type.name}'. Skipping.") + + +def import_bids_mri_data_type_files( + env: Env, + import_env: BIDSImportEnv, + args: Args, + session: DbSession, + data_type: BIDSMRIDataType, +): + """ + Read the BIDS MRI data type directory and import its files into LORIS. + """ + + if args.type == 'derivative': + log_error_exit(env, "Derivative data is not support for BIDS MRI import yet.") + + if not args.copy: + log_error_exit(env, "No copy import is not support for BIDS MRI import yet.") + + for nifti in data_type.niftis: + try: + import_bids_nifti(env, import_env, session, nifti) + except Exception as exception: + import_env.failed_files_count += 1 + log_error( + env, + ( + f"Error while importing MRI file '{nifti.name}'. Error message:\n" + f"{exception}\n" + "Skipping." + ) + ) + + +def import_bids_eeg_data_type_files( + env: Env, + import_env: BIDSImportEnv, + args: Args, + session: DbSession, + data_type: BIDSEEGDataType, + events_metadata: dict[Any, Any], + legacy_db: Database, +): + """ + Read the provided BIDS EEG data type directory and import it into LORIS. + """ + + loris_data_type_dir_rel_path = os.path.join( + f'sub-{session.candidate.psc_id}', + f'ses-{session.visit_label}', + data_type.name, + ) + + Eeg( + data_type = data_type, + db = legacy_db, + verbose = env.verbose, + data_dir = str(import_env.data_dir_path), + session = session, + loris_bids_eeg_rel_dir = loris_data_type_dir_rel_path, + loris_bids_root_dir = str(import_env.loris_bids_path), + dataset_tag_dict = events_metadata, + dataset_type = args.type, + ) + + +def copy_bids_tsv_participants(tsv_participants: dict[str, BidsTsvParticipant], loris_participants_tsv_path: Path): + """ + Copy some participants.tsv rows into the LORIS participants.tsv file, creating it if necessary. + """ + + if loris_participants_tsv_path.exists(): + loris_tsv_participants = read_bids_participants_tsv_file(loris_participants_tsv_path) + merge_bids_tsv_participants(tsv_participants, loris_tsv_participants) + + write_bids_participants_tsv_file(tsv_participants, loris_participants_tsv_path) + + +def copy_bids_tsv_scans(tsv_scans: dict[str, BidsTsvScan], loris_scans_tsv_path: Path): + """ + Copy some scans.tsv rows into a LORIS scans.tsv file, creating it if necessary. + """ + + if loris_scans_tsv_path.exists(): + loris_tsv_scans = read_bids_scans_tsv_file(loris_scans_tsv_path) + merge_bids_tsv_scans(tsv_scans, loris_tsv_scans) + + write_bids_scans_tsv_file(tsv_scans, loris_scans_tsv_path) + + +def copy_static_dataset_files(source_bids_path: Path, loris_bids_path: Path): + """ + Copy the static files of the source BIDS dataset to the LORIS BIDS dataset. + """ + + for file_name in ['README', 'dataset_description.json']: + source_file_path = os.path.join(source_bids_path, file_name) + if not os.path.isfile(source_file_path): + continue + + loris_file_path = os.path.join(loris_bids_path, file_name) + shutil.copyfile(source_file_path, loris_file_path) + + +def get_loris_bids_path(env: Env, bids: BIDSDataset, data_dir_path: Path) -> Path: + """ + Get the LORIS BIDS directory path for the BIDS dataset to import, and create that directory if + it does not exist yet. + """ + + try: + dataset_description = bids.get_dataset_description() + except BidsDatasetDescriptionError as error: + log_error_exit(env, str(error)) + + if dataset_description is None: + log_error_exit( + env, + "No file 'dataset_description.json' found in the input BIDS dataset.", + ) + + # Sanitize the dataset metadata to have a usable name for the directory. + dataset_name = re.sub(r'[^0-9a-zA-Z]+', '_', dataset_description.name) + dataset_version = re.sub(r'[^0-9a-zA-Z\.]+', '_', dataset_description.bids_version) + + loris_bids_path = data_dir_path / 'bids_imports' / f'{dataset_name}_BIDSVersion_{dataset_version}' + + if not loris_bids_path.exists(): + loris_bids_path.mkdir() + + return loris_bids_path diff --git a/python/lib/import_bids_dataset/mri.py b/python/lib/import_bids_dataset/mri.py new file mode 100644 index 000000000..1d45568c1 --- /dev/null +++ b/python/lib/import_bids_dataset/mri.py @@ -0,0 +1,227 @@ +import shutil +from pathlib import Path +from typing import Any, cast + +from lib.db.models.mri_scan_type import DbMriScanType +from lib.db.models.session import DbSession +from lib.db.queries.file import try_get_file_with_hash, try_get_file_with_rel_path +from lib.db.queries.mri_scan_type import try_get_mri_scan_type_with_name +from lib.env import Env +from lib.imaging_lib.bids.json import add_bids_json_file_parameters +from lib.imaging_lib.bids.mri.dataset import BIDSMRIAcquisition +from lib.imaging_lib.bids.tsv_scans import add_scan_tsv_file_parameters +from lib.imaging_lib.bids.util import determine_bids_file_type +from lib.imaging_lib.file import register_imaging_file +from lib.imaging_lib.file_parameter import register_file_parameter, register_file_parameters +from lib.imaging_lib.mri_scan_type import create_mri_scan_type +from lib.imaging_lib.nifti import add_nifti_file_parameters +from lib.imaging_lib.nifti_pic import create_imaging_pic +from lib.import_bids_dataset.env import BIDSImportEnv +from lib.logging import log, log_warning +from lib.util.crypto import compute_file_blake2b_hash +from lib.util.fs import get_path_extension + +KNOWN_SUFFIXES_PER_MRI_DATA_TYPE = { + 'anat': [ + 'T1w', 'T2w', 'T1rho', 'T1map', 'T2map', 'T2star', 'FLAIR', 'FLASH', 'PD', 'PDmap', 'PDT2', + 'inplaneT1', 'inplaneT2', 'angio', + ], + 'func': [ + 'bold', 'cbv', 'phase', + ], + 'dwi': [ + 'dwi', 'sbref', + ], + 'fmap': [ + 'phasediff', 'magnitude1', 'magnitude2', 'phase1', 'phase2', 'fieldmap', 'epi', + ], +} + + +def import_bids_nifti(env: Env, import_env: BIDSImportEnv, session: DbSession, acquisition: BIDSMRIAcquisition): + """ + Import a BIDS NIfTI file and its associated files in LORIS. + """ + + log( + env, + ( + f"Importing MRI acquisition '{acquisition.name}'... ({import_env.processed_files_count + 1}" + f" / {import_env.total_files_count})" + ), + ) + + # Get the relevant `scans.tsv` row if there is one. + + tsv_scan = acquisition.session.get_tsv_scan(acquisition.nifti_path.name) + if tsv_scan is None: + log_warning( + env, + f"No scans.tsv row found for acquisition '{acquisition.name}', scans.tsv data will be ignored.", + ) + + # Get the path at which to copy the file. + + loris_file_dir_path = ( + # The LORIS BIDS path should not be `None` since `--no-copy` is not supported for MRI acquisitions yet. + cast(Path, import_env.loris_bids_path) + / f'sub-{session.candidate.psc_id}' + / f'ses-{session.visit_label}' + / acquisition.data_type.name + ) + + loris_file_path = loris_file_dir_path / acquisition.nifti_path.name + + loris_file_rel_path = loris_file_path.relative_to(import_env.data_dir_path) + + # Check whether the file is already registered in LORIS. + + loris_file = try_get_file_with_rel_path(env.db, str(loris_file_rel_path)) + if loris_file is not None: + import_env.ignored_files_count += 1 + log(env, f"File '{loris_file_rel_path}' is already registered in LORIS. Skipping.") + return + + # Get information about the file. + + file_type = get_check_nifti_imaging_file_type(env, acquisition) + file_hash = get_check_nifti_file_hash(env, acquisition) + mri_scan_type = get_nifti_mri_scan_type(env, import_env, acquisition) + + # Get the auxiliary files. + + aux_file_paths: list[Path] = [] + + if acquisition.bval_path is not None: + aux_file_paths.append(acquisition.bval_path) + + if acquisition.bvec_path is not None: + aux_file_paths.append(acquisition.bvec_path) + + # Get the file parameters. + + file_parameters: dict[str, Any] = {} + + if acquisition.sidecar_path is not None: + json_loris_path = loris_file_dir_path / acquisition.sidecar_path.name + json_loris_rel_path = json_loris_path.relative_to(import_env.data_dir_path) + add_bids_json_file_parameters(env, acquisition.sidecar_path, json_loris_rel_path, file_parameters) + + add_nifti_file_parameters(acquisition.nifti_path, file_hash, file_parameters) + + if acquisition.session.tsv_scans_path is not None and tsv_scan is not None: + add_scan_tsv_file_parameters(tsv_scan, acquisition.session.tsv_scans_path, file_parameters) + + for aux_file_path in aux_file_paths: + aux_file_type = get_path_extension(aux_file_path) + aux_file_hash = compute_file_blake2b_hash(aux_file_path) + aux_file_loris_path = loris_file_dir_path / aux_file_path.name + aux_file_loris_rel_path = aux_file_loris_path.relative_to(import_env.data_dir_path) + file_parameters[f'bids_{aux_file_type}'] = str(aux_file_loris_rel_path) + file_parameters[f'bids_{aux_file_type}_blake2b_hash'] = aux_file_hash + + # Copy the files on the file system. + + copy_bids_file(loris_file_dir_path, acquisition.nifti_path) + + if acquisition.sidecar_path is not None: + copy_bids_file(loris_file_dir_path, acquisition.sidecar_path) + + for aux_file_path in aux_file_paths: + copy_bids_file(loris_file_dir_path, aux_file_path) + + # Register the file and its parameters in the database. + + echo_time = file_parameters.get('EchoTime') + echo_number = file_parameters.get('EchoNumber') + phase_encoding_direction = file_parameters.get('PhaseEncodingDirection') + + file = register_imaging_file( + env, + file_type, + str(loris_file_rel_path), + session, + mri_scan_type, + echo_time, + echo_number, + phase_encoding_direction, + ) + + register_file_parameters(env, file, file_parameters) + + # Create and register the file picture. + + pic_rel_path = create_imaging_pic(env, file, True if 'time' in file_parameters else False) + + register_file_parameter(env, file, 'check_pic_filename', pic_rel_path) + + import_env.imported_files_count += 1 + + +def get_check_nifti_imaging_file_type(env: Env, acqusition: BIDSMRIAcquisition) -> str: + """ + Get the BIDS file type of a NIfTI file and raise an exception if that file type is not + registered in the database. + """ + + file_type = determine_bids_file_type(env, acqusition.nifti_path.name) + if file_type is None: + raise Exception("No matching file type found in the database.") + + return file_type + + +def get_check_nifti_file_hash(env: Env, acquisition: BIDSMRIAcquisition) -> str: + """ + Compute the BLAKE2b hash of a NIfTI file and raise an exception if that hash is already + registered in the database. + """ + + file_hash = compute_file_blake2b_hash(acquisition.nifti_path) + + file = try_get_file_with_hash(env.db, file_hash) + if file is not None: + raise Exception(f"File with hash '{file_hash}' already present in the database.") + + return file_hash + + +def get_nifti_mri_scan_type( + env: Env, + import_env: BIDSImportEnv, + acquisition: BIDSMRIAcquisition, +) -> DbMriScanType | None: + """ + Get the MRI scan type corresponding to a BIDS MRI acquisition using its BIDS suffix. Create the + MRI scan type in the database the suffix is a standard BIDS suffix and the scan type does not + already exist in the database, or raise an exception if no known scan type is found. + """ + + if acquisition.suffix is None: + raise Exception("No BIDS suffix found in the NIfTI file name, cannot infer the file data type.") + + mri_scan_type = try_get_mri_scan_type_with_name(env.db, acquisition.suffix) + if mri_scan_type is not None: + return mri_scan_type + + if acquisition.suffix not in KNOWN_SUFFIXES_PER_MRI_DATA_TYPE[acquisition.data_type.name]: + if acquisition.suffix not in import_env.unknown_scan_types: + import_env.unknown_scan_types.append(acquisition.suffix) + + raise Exception(f"Found unknown MRI file suffix '{acquisition.suffix}'.") + + return create_mri_scan_type(env, acquisition.suffix) + + +def copy_bids_file(loris_file_dir_path: Path, file_path: Path): + """ + Copy a BIDS file to a directory. + """ + + loris_file_path = loris_file_dir_path / file_path.name + + if loris_file_path.exists(): + raise Exception(f"File '{loris_file_path}' already exists in LORIS.") + + loris_file_dir_path.mkdir(exist_ok=True) + shutil.copyfile(file_path, loris_file_path) diff --git a/python/lib/import_bids_dataset/print.py b/python/lib/import_bids_dataset/print.py new file mode 100644 index 000000000..0782a9522 --- /dev/null +++ b/python/lib/import_bids_dataset/print.py @@ -0,0 +1,30 @@ +from lib.env import Env +from lib.import_bids_dataset.env import BIDSImportEnv +from lib.logging import log + + +def print_bids_import_summary(env: Env, import_env: BIDSImportEnv): + """ + Print a summary of this BIDS import process. + """ + + log( + env, + ( + f"Processed {import_env.processed_files_count} MRI files, including {import_env.imported_files_count}" + f" imported files, {import_env.ignored_files_count} ignored files, and {import_env.failed_files_count}" + " errors." + ), + ) + + if import_env.unknown_scan_types != []: + import_env.unknown_scan_types.sort() + + unknwon_scan_types_string = "" + for unknown_scan_type in import_env.unknown_scan_types: + unknwon_scan_types_string += f"\n- {unknown_scan_type}" + + log( + env, + f"Found {len(import_env.unknown_scan_types)} unknown MRI scan types:{unknwon_scan_types_string}" + ) diff --git a/python/lib/mri.py b/python/lib/mri.py deleted file mode 100644 index 03259b5ca..000000000 --- a/python/lib/mri.py +++ /dev/null @@ -1,455 +0,0 @@ -"""Deals with MRI BIDS datasets and register them into the database.""" - -import getpass -import json -import os -import re -import sys - -import lib.exitcode -import lib.utilities as utilities -from lib.candidate import Candidate -from lib.imaging import Imaging -from lib.scanstsv import ScansTSV -from lib.session import Session -from lib.util.crypto import compute_file_blake2b_hash - - -class Mri: - """ - This class reads the BIDS MRI data structure and registers the MRI datasets into the - database by calling lib.imaging class. - - :Example: - - from lib.bidsreader import BidsReader - from lib.mri import Mri - from lib.database import Database - - # database connection - db = Database(config_file.mysql, verbose) - db.connect() - - # grep config settings from the Config module - config_obj = Config(db, verbose) - default_bids_vl = config_obj.get_config('default_bids_vl') - data_dir = config_obj.get_config('dataDirBasepath') - - # load the BIDS directory - bids_reader = BidsReader(bids_dir) - - # create the LORIS_BIDS directory in data_dir based on Name and BIDS version - loris_bids_root_dir = create_loris_bids_directory( - bids_reader, data_dir, verbose - ) - for row in bids_reader.cand_session_modalities_list: - for modality in row['modalities']: - if modality in ['anat', 'dwi', 'fmap', 'func']: - bids_session = row['bids_ses_id'] - visit_label = bids_session if bids_session else default_bids_vl - loris_bids_mri_rel_dir = "sub-" + row['bids_sub_id'] + "/" + \ - "ses-" + visit_label + "/mri/" - lib.utilities.create_dir( - loris_bids_root_dir + loris_bids_mri_rel_dir, verbose - ) - Eeg( - bids_reader = bids_reader, - bids_sub_id = row['bids_sub_id'], - bids_ses_id = row['bids_ses_id'], - bids_modality = modality, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_eeg_rel_dir = loris_bids_mri_rel_dir, - loris_bids_root_dir = loris_bids_root_dir - ) - - # disconnect from the database - db.disconnect() - """ - - def __init__(self, bids_reader, bids_sub_id, bids_ses_id, bids_modality, db, - verbose, data_dir, default_visit_label, - loris_bids_mri_rel_dir, loris_bids_root_dir): - - # enumerate the different suffixes supported by BIDS per modality type - self.possible_suffix_per_modality = { - 'anat' : [ - 'T1w', 'T2w', 'T1rho', 'T1map', 'T2map', 'T2star', 'FLAIR', - 'FLASH', 'PD', 'PDmap', 'PDT2', 'inplaneT1', 'inplaneT2', 'angio' - ], - 'func' : [ - 'bold', 'cbv', 'phase' - ], - 'dwi' : [ - 'dwi', 'sbref' - ], - 'fmap' : [ - 'phasediff', 'magnitude1', 'magnitude2', 'phase1', 'phase2', - 'fieldmap', 'epi' - ] - } - - # load bids objects - self.bids_reader = bids_reader - self.bids_layout = bids_reader.bids_layout - - # load the LORIS BIDS import root directory where the files will be copied - self.loris_bids_mri_rel_dir = loris_bids_mri_rel_dir - self.loris_bids_root_dir = loris_bids_root_dir - self.data_dir = data_dir - - # load BIDS subject, visit and modality - self.bids_sub_id = bids_sub_id - self.bids_ses_id = bids_ses_id - self.bids_modality = bids_modality - - # load database handler object and verbose bool - self.db = db - self.verbose = verbose - - # find corresponding CandID and SessionID in LORIS - self.loris_cand_info = self.get_loris_cand_info() - self.default_vl = default_visit_label - self.psc_id = self.loris_cand_info['PSCID'] - self.cand_id = self.loris_cand_info['CandID'] - self.center_id = self.loris_cand_info['RegistrationCenterID'] - self.project_id = self.loris_cand_info['RegistrationProjectID'] - self.cohort_id = None - for row in bids_reader.participants_info: - if not row['participant_id'] == self.psc_id: - continue - if 'cohort' in row: - cohort_info = db.pselect( - "SELECT CohortID FROM cohort WHERE title = %s", - [row['cohort'], ] - ) - if len(cohort_info) > 0: - self.cohort_id = cohort_info[0]['CohortID'] - break - - self.session_id = self.get_loris_session_id() - - # grep all the NIfTI files for the modality - self.nifti_files = self.grep_nifti_files() - - # check if a tsv with acquisition dates or age is available for the subject - self.scans_file = None - if self.bids_layout.get(suffix='scans', subject=self.psc_id, return_type='filename'): - self.scans_file = self.bids_layout.get(suffix='scans', subject=self.psc_id, - return_type='filename', extension='tsv')[0] - - # loop through NIfTI files and register them in the DB - for nifti_file in self.nifti_files: - self.register_raw_file(nifti_file) - - def get_loris_cand_info(self): - """ - Gets the LORIS Candidate info for the BIDS subject. - - :return: Candidate info of the subject found in the database - :rtype: list - """ - - candidate = Candidate(verbose=self.verbose, psc_id=self.bids_sub_id) - loris_cand_info = candidate.get_candidate_info_from_loris(self.db) - - return loris_cand_info - - def get_loris_session_id(self): - """ - Greps the LORIS session.ID corresponding to the BIDS visit. Note, - if no BIDS visit are set, will use the default visit label value set - in the config module - - :return: the session's ID in LORIS - :rtype: int - """ - - # check if there are any visit label in BIDS structure, if not, - # will use the default visit label set in the config module - visit_label = self.bids_ses_id if self.bids_ses_id else self.default_vl - - session = Session( - self.db, self.verbose, self.cand_id, visit_label, - self.center_id, self.project_id, self.cohort_id - ) - loris_vl_info = session.get_session_info_from_loris() - - if not loris_vl_info: - message = "ERROR: visit label " + visit_label + "does not exist in " + \ - "the session table for candidate " + self.cand_id + \ - "\nPlease make sure the visit label is created in the " + \ - "database or run bids_import.py with the -s option -s if " + \ - "you wish that the insertion pipeline creates the visit " + \ - "label in the session table." - print(message) - exit(lib.exitcode.SELECT_FAILURE) - - return loris_vl_info['ID'] - - def grep_nifti_files(self): - """ - Returns the list of NIfTI files found for the modality. - - :return: list of NIfTI files found for the modality - :rtype: list - """ - - # grep all the possible suffixes for the modality - modality_possible_suffix = self.possible_suffix_per_modality[self.bids_modality] - - # loop through the possible suffixes and grep the NIfTI files - nii_files_list = [] - for suffix in modality_possible_suffix: - nii_files_list.extend(self.grep_bids_files(suffix, 'nii.gz')) - - # return the list of found NIfTI files - return nii_files_list - - def grep_bids_files(self, bids_type, extension): - """ - Greps the BIDS files and their layout information from the BIDSLayout - and return that list. - - :param bids_type: the BIDS type to use to grep files (T1w, T2w, bold, dwi...) - :type bids_type: str - :param extension: extension of the file to look for (nii.gz, json...) - :type extension: str - - :return: list of files from the BIDS layout - :rtype: list - """ - - if self.bids_ses_id: - return self.bids_layout.get( - subject = self.bids_sub_id, - session = self.bids_ses_id, - datatype = self.bids_modality, - extension = extension, - suffix = bids_type - ) - else: - return self.bids_layout.get( - subject = self.bids_sub_id, - datatype = self.bids_modality, - extension = extension, - suffix = bids_type - ) - - def register_raw_file(self, nifti_file): - """ - Registers raw MRI files and related files into the files and parameter_file tables. - - :param nifti_file: NIfTI file object - :type nifti_file: pybids NIfTI file object - """ - - # insert the NIfTI file - self.fetch_and_insert_nifti_file(nifti_file) - - def fetch_and_insert_nifti_file(self, nifti_file, derivatives=None): - """ - Gather NIfTI file information to insert into the files and parameter_file tables. - Once all the information has been gathered, it will call imaging.insert_imaging_file - that will perform the insertion into the files and parameter_file tables. - - :param nifti_file : NIfTI file object - :type nifti_file : pybids NIfTI file object - :param derivatives: whether the file to be registered is a derivative file - :type derivatives: bool - - :return: dictionary with the inserted file_id and file_path - :rtype: dict - """ - - # load the Imaging object that will be used to insert the imaging data into the database - imaging = Imaging(self.db, self.verbose) - - # load the list of associated files with the NIfTI file - associated_files = nifti_file.get_associations() - - # load the entity information from the NIfTI file - entities = nifti_file.get_entities() - scan_type = entities['suffix'] - - # loop through the associated files to grep JSON, bval, bvec... - json_file = None - other_assoc_files = {} - for assoc_file in associated_files: - file_info = assoc_file.get_entities() - if re.search(r'json$', file_info['extension']): - json_file = assoc_file.path - elif re.search(r'bvec$', file_info['extension']): - other_assoc_files['bvec_file'] = assoc_file.path - elif re.search(r'bval$', file_info['extension']): - other_assoc_files['bval_file'] = assoc_file.path - elif re.search(r'tsv$', file_info['extension']) and file_info['suffix'] == 'events': - other_assoc_files['task_file'] = assoc_file.path - elif re.search(r'tsv$', file_info['extension']) and file_info['suffix'] == 'physio': - other_assoc_files['physio_file'] = assoc_file.path - - # read the json file if it exists - file_parameters = {} - if json_file: - with open(json_file) as data_file: - file_parameters = json.load(data_file) - file_parameters = imaging.map_bids_param_to_loris_param(file_parameters) - # copy the JSON file to the LORIS BIDS import directory - json_path = self.copy_file_to_loris_bids_dir(json_file) - file_parameters['bids_json_file'] = json_path - json_blake2 = compute_file_blake2b_hash(json_file) - file_parameters['bids_json_file_blake2b_hash'] = json_blake2 - - # grep the file type from the ImagingFileTypes table - file_type = imaging.determine_file_type(nifti_file.filename) - if not file_type: - message = "\nERROR: File type for " + nifti_file.filename \ - + " does not exist in ImagingFileTypes database table\n" - print(message) - sys.exit(lib.exitcode.SELECT_FAILURE) - - # determine the output type - output_type = 'derivatives' if derivatives else 'native' - if not derivatives: - coordinate_space = 'native' - - # get the acquisition date of the MRI or the age at the time of acquisition - if self.scans_file: - scan_info = ScansTSV(self.scans_file, nifti_file.filename, self.verbose) - file_parameters['scan_acquisition_time'] = scan_info.get_acquisition_time() - file_parameters['age_at_scan'] = scan_info.get_age_at_scan() - # copy the scans.tsv file to the LORIS BIDS import directory - scans_path = scan_info.copy_scans_tsv_file_to_loris_bids_dir( - self.bids_sub_id, self.loris_bids_root_dir, self.data_dir - ) - file_parameters['scans_tsv_file'] = scans_path - scans_blake2 = compute_file_blake2b_hash(self.scans_file) - file_parameters['scans_tsv_file_bake2hash'] = scans_blake2 - - # grep voxel step from the NIfTI file header - step_parameters = imaging.get_nifti_image_step_parameters(nifti_file.path) - file_parameters['xstep'] = step_parameters[0] - file_parameters['ystep'] = step_parameters[1] - file_parameters['zstep'] = step_parameters[2] - - # grep the time length from the NIfTI file header - is_4d_dataset = False - length_parameters = imaging.get_nifti_image_length_parameters(nifti_file.path) - if len(length_parameters) == 4: - file_parameters['time'] = length_parameters[3] - is_4d_dataset = True - - # add all other associated files to the file_parameters so they get inserted - # in parameter_file - for type in other_assoc_files: - original_file_path = other_assoc_files[type] - copied_path = self.copy_file_to_loris_bids_dir(original_file_path) - file_param_name = 'bids_' + type - file_parameters[file_param_name] = copied_path - file_blake2 = compute_file_blake2b_hash(original_file_path) - hash_param_name = file_param_name + '_blake2b_hash' - file_parameters[hash_param_name] = file_blake2 - - # append the blake2b to the MRI file parameters dictionary - blake2 = compute_file_blake2b_hash(nifti_file.path) - file_parameters['file_blake2b_hash'] = blake2 - - # check that the file is not already inserted before inserting it - result = imaging.grep_file_info_from_hash(blake2) - file_id = result['FileID'] if result else None - file_path = result['File'] if result else None - if not file_id: - # grep the scan type ID from the mri_scan_type table (if it is not already in - # the table, it will add a row to the mri_scan_type table) - scan_type_id = self.db.grep_id_from_lookup_table( - id_field_name = 'MriScanTypeID', - table_name = 'mri_scan_type', - where_field_name = 'MriScanTypeName', - where_value = scan_type, - insert_if_not_found = True - ) - - # copy the NIfTI file to the LORIS BIDS import directory - file_path = self.copy_file_to_loris_bids_dir(nifti_file.path) - - # insert the file along with its information into files and parameter_file tables - echo_time = file_parameters['EchoTime'] if 'EchoTime' in file_parameters.keys() else None - echo_nb = file_parameters['EchoNumber'] if 'EchoNumber' in file_parameters.keys() else None - phase_enc_dir = file_parameters['PhaseEncodingDirection'] \ - if 'PhaseEncodingDirection' in file_parameters.keys() else None - file_info = { - 'FileType' : file_type, - 'File' : file_path, - 'SessionID' : self.session_id, - 'InsertedByUserID': getpass.getuser(), - 'CoordinateSpace' : coordinate_space, - 'OutputType' : output_type, - 'EchoTime' : echo_time, - 'PhaseEncodingDirection': phase_enc_dir, - 'EchoNumber' : echo_nb, - 'SourceFileID' : None, - 'MriScanTypeID' : scan_type_id - } - file_id = imaging.insert_imaging_file(file_info, file_parameters) - - # create the pic associated with the file - pic_rel_path = imaging.create_imaging_pic( - { - 'cand_id' : self.cand_id, - 'data_dir_path': self.data_dir, - 'file_rel_path': file_path, - 'is_4D_dataset': is_4d_dataset, - 'file_id' : file_id - } - ) - if os.path.exists(os.path.join(self.data_dir, 'pic/', pic_rel_path)): - imaging.insert_parameter_file(file_id, 'check_pic_filename', pic_rel_path) - - return {'file_id': file_id, 'file_path': file_path} - - def copy_file_to_loris_bids_dir(self, file, derivatives_path=None): - """ - Wrapper around the utilities.copy_file function that copies the file - to the LORIS BIDS import directory and returns the relative path of the - file (without the data_dir part). - - :param file: full path to the original file - :type file: str - :param derivatives_path: path to the derivative folder - :type derivatives_path: str - - :return: relative path to the copied file - :rtype: str - """ - - # determine the path of the copied file - copy_file = self.loris_bids_mri_rel_dir - if self.bids_ses_id: - copy_file += os.path.basename(file) - else: - # make sure the ses- is included in the new filename if using - # default visit label from the LORIS config - copy_file += str.replace( - os.path.basename(file), - "sub-" + self.bids_sub_id, - "sub-" + self.bids_sub_id + "_ses-" + self.default_vl - ) - if derivatives_path: - # create derivative subject/vl/modality directory - lib.utilities.create_dir( - derivatives_path + self.loris_bids_mri_rel_dir, - self.verbose - ) - copy_file = derivatives_path + copy_file - else: - copy_file = self.loris_bids_root_dir + copy_file - - # copy the file - utilities.copy_file(file, copy_file, self.verbose) - - # determine the relative path and return it - relative_path = copy_file.replace(self.data_dir, "") - - return relative_path diff --git a/python/lib/scanstsv.py b/python/lib/scanstsv.py deleted file mode 100644 index 5cb938615..000000000 --- a/python/lib/scanstsv.py +++ /dev/null @@ -1,128 +0,0 @@ -"""Deals with sub-XXX_scans.tsv BIDS files""" - -import os - -from dateutil.parser import parse - -import lib -import lib.utilities as utilities - - -class ScansTSV: - """ - This class reads the BIDS sub-XXX_scans.tsv file that includes acquisition level information - such as scan date or age at scan... - - :Example: - from lib.scanstsv import ScansTSV - - scan_info = ScansTSV(scans_tsv_file, acquisition_file) - - acquisition_time = scan_info.get_acquisition_time() - age_at_scan = scan_info.get_age_at_scan - - """ - - def __init__(self, scans_tsv_file, acquisition_file, verbose): - """ - Constructor method for the ScansTSV class - - :param scans_tsv_file : path to the BIDS sub-XXX_scans.tsv file - :type scans_tsv_file : str - :param acquisition_file: path to the acquisition file (.nii, .set, .edf...) - :type acquisition_file: str - """ - - self.verbose = verbose - - # store files' paths - self.scans_tsv_file = scans_tsv_file - self.acquisition_file = acquisition_file - - # read the TSV file and store the header names and data - self.tsv_entries = utilities.read_tsv_file(self.scans_tsv_file) - self.tsv_headers = self.tsv_entries[0] - - # get the acquisition information for the acquisition file - self.acquisition_data = self.find_acquisition_data() - - def find_acquisition_data(self): - """ - Gets the information for the acquisition file from the TSV file. - - :return: the acquisition information found in the TSV file for the acquisition file - :rtype: list - """ - - for entry in self.tsv_entries: - if os.path.basename(self.acquisition_file) in entry['filename']: - return entry - - def get_acquisition_time(self): - """ - Get the acquisition time of the acquisition file. - - :return: acquisition time or None if not found - :rtype: str - """ - - if not self.acquisition_data: - # if no entry in self.acquisition_data, then no information available to get the acquisition time - return None - - if 'acq_time' in self.acquisition_data: - acq_time_list = [ele for ele in self.tsv_entries if ele['filename'] in self.acquisition_file] - if len(acq_time_list) == 1: - # the variable name could be mri_acq_time, but is eeg originally. - eeg_acq_time = acq_time_list[0]['acq_time'] - else: - print('More than one or no acquisition time has been found for ', self.acquisition_file) - exit() - - if eeg_acq_time == 'n/a': - return None - - try: - eeg_acq_time = parse(eeg_acq_time) - except ValueError as e: - message = "ERROR: could not convert acquisition time '" + \ - eeg_acq_time + \ - "' to datetime: " + str(e) - print(message) - exit(lib.exitcode.PROGRAM_EXECUTION_FAILURE) - return eeg_acq_time - - return None - - def get_age_at_scan(self): - """ - Get the age at the time of acquisition. - - :return: age at acquisition time - :rtype: str - """ - - # list of possible header names containing the age information - age_header_list = ['age', 'age_at_scan', 'age_acq_time'] - - for header_name in age_header_list: - if header_name in self.tsv_headers and self.acquisition_data: - return self.acquisition_data[header_name].strip() - - return None - - def copy_scans_tsv_file_to_loris_bids_dir(self, bids_sub_id, loris_bids_root_dir, data_dir): - - original_file_path = self.scans_tsv_file - final_file_path = loris_bids_root_dir + '/sub-' + bids_sub_id + '/' + os.path.basename(self.scans_tsv_file) - - # copy the scans.tsv file to the new directory - if os.path.exists(final_file_path): - lib.utilities.append_to_tsv_file(original_file_path, final_file_path, "filename", self.verbose) - else: - lib.utilities.copy_file(original_file_path, final_file_path, self.verbose) - - # determine the relative path and return it - relative_path = final_file_path.replace(data_dir, '') - - return relative_path diff --git a/python/lib/session.py b/python/lib/session.py deleted file mode 100644 index dd34aa954..000000000 --- a/python/lib/session.py +++ /dev/null @@ -1,228 +0,0 @@ -"""This class gather functions for session handling.""" - -from typing_extensions import deprecated - -from lib.database_lib.candidate_db import CandidateDB -from lib.database_lib.project_cohort_rel import ProjectCohortRel -from lib.database_lib.session_db import SessionDB -from lib.database_lib.site import Site - - -class Session: - """ - This class gather functions that interact with the database and allow session - creation or to fetch session information directly from the database. - - :Example: - - from lib.session import Session - from lib.database import Database - - # database connection - db = Database(config.mysql, verbose) - db.connect() - - session = Session( - verbose, cand_id, visit_label, - center_id, project_id, cohort_id - ) - - # grep session information from the database - loris_vl_info = session.get_session_info_from_loris(db) - - # insert the session into the database - loris_vl_info = session.create_session(db) - - # disconnect from the database - db.disconnect() - """ - - def __init__(self, db, verbose, cand_id=None, visit_label=None, - center_id=None, project_id=None, cohort_id=None): - """ - Constructor method for the Session class. - - :param verbose : whether to be verbose - :type verbose : bool - :param cand_id : candidate's CandID - :type cand_id : int - :param visit_label : visit label - :type visit_label : str - :param center_id : center ID to associate with the session - :type center_id : int - :param project_id : project ID to associate with the session - :type project_id : int - :param cohort_id: cohort ID to associate with the session - :type cohort_id: int - """ - self.db = db - self.verbose = verbose - - self.proj_cohort_rel_db_obj = ProjectCohortRel(db, verbose) - self.candidate_db_obj = CandidateDB(db, verbose) - self.session_db_obj = SessionDB(db, verbose) - self.site_db_obj = Site(db, verbose) - - self.cand_id = str(cand_id) - self.visit_label = visit_label - self.center_id = center_id - self.project_id = project_id - self.cohort_id = cohort_id - - self.proj_cohort_rel_info_dict = dict() - self.session_info_dict = dict() - self.session_id = None - - def create_session(self): - """ - Creates a session using BIDS information. - - :param db: database handler object - :type db: object - - :return: dictionary with session info from the session's table - :rtype: dict - """ - # TODO refactor bids_import pipeline to use same functions as dcm2bids below. To be done in different PR though - if self.verbose: - print("Creating visit " + self.visit_label - + " for CandID " + self.cand_id) - - # fetch the candidate.ID associated to the CandID first - candidate_id = self.candidate_db_obj.get_candidate_id(self.cand_id) - column_names = ('CandidateID', 'Visit_label', 'CenterID', 'Current_stage') - values = (candidate_id, self.visit_label, str(self.center_id), 'Not Started') - - if self.project_id: - column_names = (*column_names, 'ProjectID') - values = (*values, str(self.project_id)) - - if self.cohort_id: - column_names = (*column_names, 'CohortID') - values = (*values, str(self.cohort_id)) - - self.db.insert( - table_name='session', - column_names=column_names, - values=values - ) - - loris_session_info = self.get_session_info_from_loris() - - return loris_session_info - - def get_session_info_from_loris(self): - """ - Grep session information from the session table using CandID and - Visit_label. - - :param db: database handler object - :type db: object - - :return: dictionary with session info from the session's table - :rtype: dict - """ - # TODO refactor bids_import pipeline to use same functions as dcm2bids below. To be done in different PR though - loris_session_info = self.db.pselect( - """ - SELECT PSCID, CandID, session.* - FROM session - JOIN candidate ON (candidate.ID=session.CandidateID) - WHERE CandID = %s AND Visit_label = %s - """, - (self.cand_id, self.visit_label) - ) - - return loris_session_info[0] if loris_session_info else None - - @deprecated('Use `lib.db.queries.site.try_get_site_with_psc_id_visit_label` instead') - def get_session_center_info(self, pscid, visit_label): - """ - Get the session center information based on the PSCID and visit label of a session. - - :param pscid: candidate site ID (PSCID) - :type pscid: str - :param visit_label: visit label - :type visit_label: str - - :return: dictionary of site information for the visit/candidate queried - :rtype: dict - """ - return self.session_db_obj.get_session_center_info(pscid, visit_label) - - @deprecated('Use `lib.db.queries.try_get_candidate_with_cand_id_visit_label` instead') - def create_session_dict(self, cand_id, visit_label): - """ - Creates the session information dictionary based on a candidate ID and visit label. This will populate - self.session_info_dict based on the result returned from the database query. - - :param cand_id: CandID - :type cand_id: int - :param visit_label: Visit label of the session - :type visit_label: str - """ - self.session_info_dict = self.session_db_obj.create_session_dict(cand_id, visit_label) - if self.session_info_dict: - self.cand_id = self.session_info_dict['CandID'] - self.visit_label = self.session_info_dict['Visit_label'] - self.center_id = self.session_info_dict['CenterID'] - self.project_id = self.session_info_dict['ProjectID'] - self.cohort_id = self.session_info_dict['CohortID'] - self.session_id = self.session_info_dict['ID'] - - @deprecated('Use `lib.db.models.session.DbSession` instead') - def insert_into_session(self, session_info_to_insert_dict): - """ - Insert a new row in the session table using fields list as column names and values as values. - - :param session_info_to_insert_dict: dictionary with the column names and values to use for insertion - :type session_info_to_insert_dict: dict - - :return: ID of the new session registered - :rtype: int - """ - self.session_id = self.session_db_obj.insert_into_session( - fields=list(session_info_to_insert_dict.keys()), - values=list(session_info_to_insert_dict.values()) - ) - - return self.session_id - - @deprecated('Use `lib.get_subject_session.get_candidate_next_visit_number` instead') - def get_next_session_site_id_and_visit_number(self, cand_id): - """ - Determines the next session site and visit number based on the last session inserted for a given candidate. - - :param cand_id: candidate ID - :type cand_id: int - - :return: a dictionary with 'newVisitNo' and 'CenterID' keys/values - :rtype: dict - """ - return self.session_db_obj.determine_next_session_site_id_and_visit_number(cand_id) - - @deprecated('Use `lib.db.queries.site.get_all_sites` instead') - def get_list_of_sites(self): - """ - Get the list of sites available in the psc table. - - :return: list of sites - :rtype: list - """ - - return self.site_db_obj.get_list_of_sites() - - @deprecated('Use `lib.db.models.project_cohort.DbProjectCohort` instead') - def create_proj_cohort_rel_info_dict(self, project_id, cohort_id): - """ - Populate self.proj_cohort_rel_info_dict with the content returned from the database for the ProjectID and - CohortID. - - :param project_id: ID of the Project - :type project_id: int - :param cohort_id: ID of the Cohort - :type cohort_id: int - """ - self.proj_cohort_rel_info_dict = self.proj_cohort_rel_db_obj.create_proj_cohort_rel_dict( - project_id, cohort_id - ) diff --git a/python/lib/util/crypto.py b/python/lib/util/crypto.py index 72a790512..84c25910c 100644 --- a/python/lib/util/crypto.py +++ b/python/lib/util/crypto.py @@ -1,7 +1,8 @@ import hashlib +from pathlib import Path -def compute_file_blake2b_hash(file_path: str) -> str: +def compute_file_blake2b_hash(file_path: Path | str) -> str: """ Compute the BLAKE2b hash of a file. """ @@ -15,7 +16,7 @@ def compute_file_blake2b_hash(file_path: str) -> str: return hash.hexdigest() -def compute_file_md5_hash(file_path: str) -> str: +def compute_file_md5_hash(file_path: Path | str) -> str: """ Compute the MD5 hash of a file. """ diff --git a/python/lib/util/fs.py b/python/lib/util/fs.py index 126a7c217..4d56eb7ee 100644 --- a/python/lib/util/fs.py +++ b/python/lib/util/fs.py @@ -5,6 +5,7 @@ import tempfile from collections.abc import Iterator from datetime import datetime +from pathlib import Path import lib.exitcode from lib.env import Env @@ -80,37 +81,54 @@ def remove_empty_directories(dir_path: str): os.rmdir(subdir_path) -def get_file_extension(file_name: str) -> str: +def get_path_stem(path: Path) -> str: """ - Get the extension (including multiple extensions) of a file name or path without the leading - dot. + Get the stem of a path, that is, the name of the file without its extension (including multiple + extensions). """ - parts = file_name.split('.', maxsplit=1) + parts = path.name.split('.') + return parts[0] + + +def get_path_extension(path: Path) -> str: + """ + Get the extension (including multiple extensions) of a path without the leading dot. + """ + + parts = path.name.split('.', maxsplit=1) if len(parts) == 1: return '' return parts[1] -def replace_file_extension(file_name: str, extension: str) -> str: +def remove_path_extension(path: Path) -> Path: + """ + Remove the extension (including multiple extensions) of a path. + """ + + parts = path.name.split('.') + return path.parent / parts[0] + + +def replace_path_extension(path: Path, extension: str) -> Path: """ - Replace the extension (including multiple extensions) of a file name or path by another - extension. + Replace the extension (including multiple extensions) of a path by another extension. """ - parts = file_name.split('.') - return f'{parts[0]}.{extension}' + parts = path.name.split('.') + return path.parent / f'{parts[0]}.{extension}' -def search_dir_file_with_regex(dir_path: str, regex: str) -> str | None: +def search_dir_file_with_regex(dir_path: Path, regex: str) -> Path | None: """ - Search for a file within a directory whose name matches a regular expression, or return `None` - if no such file is found. + Search for a file or directory within a directory whose name matches a regular expression, or + return `None` if no such file is found. """ - for file in os.scandir(dir_path): - if re.search(regex, file.name): - return file.name + for file_path in dir_path.iterdir(): + if re.search(regex, file_path.name): + return file_path return None diff --git a/python/scripts/bids_import.py b/python/scripts/bids_import.py deleted file mode 100755 index 3c43940c4..000000000 --- a/python/scripts/bids_import.py +++ /dev/null @@ -1,610 +0,0 @@ -#!/usr/bin/env python - -"""Script to import BIDS structure into LORIS.""" - -import getopt -import json -import os -import re -import sys - -import lib.exitcode -import lib.physiological -import lib.utilities -from lib.bidsreader import BidsReader -from lib.candidate import Candidate -from lib.config_file import load_config -from lib.database import Database -from lib.database_lib.config import Config -from lib.eeg import Eeg -from lib.mri import Mri -from lib.session import Session -from lib.util.crypto import compute_file_blake2b_hash - -sys.path.append('/home/user/python') - - -# to limit the traceback when raising exceptions. -# sys.tracebacklimit = 0 - -def main(): - bids_dir = '' - verbose = False - createcand = False - createvisit = False - idsvalidation = False - nobidsvalidation = False - type = None - profile = None - nocopy = False - - long_options = [ - "help", "profile=", "directory=", - "createcandidate", "createsession", "idsvalidation", - "nobidsvalidation", "nocopy", "type=", - "verbose" - ] - usage = ( - '\n' - 'usage : bids_import -d -p \n\n' - 'options: \n' - '\t-p, --profile : name of the python database config file in the config directory\n' - '\t-d, --directory : BIDS directory to parse & insert into LORIS\n' - 'If directory is within $data_dir/assembly_bids, no copy will be performed' - '\t-c, --createcandidate : to create BIDS candidates in LORIS (optional)\n' - '\t-s, --createsession : to create BIDS sessions in LORIS (optional)\n' - '\t-i, --idsvalidation : to validate BIDS directory for a matching pscid/candid pair (optional)\n' - '\t-b, --nobidsvalidation : to disable BIDS validation for BIDS compliance\n' - '\t-a, --nocopy : to disable dataset copy in data assembly_bids\n' - '\t-t, --type : raw | derivative. Specify the dataset type.' - 'If not set, the pipeline will look for both raw and derivative files.\n' - 'Required if no dataset_description.json is found.\n' - '\t-v, --verbose : be verbose\n' - ) - - try: - opts, _ = getopt.getopt(sys.argv[1:], 'hp:d:csinat:v', long_options) - except getopt.GetoptError: - print(usage) - sys.exit(lib.exitcode.GETOPT_FAILURE) - - for opt, arg in opts: - if opt in ('-h', '--help'): - print(usage) - sys.exit() - elif opt in ('-p', '--profile'): - profile = arg - elif opt in ('-d', '--directory'): - bids_dir = arg - elif opt in ('-v', '--verbose'): - verbose = True - elif opt in ('-c', '--createcandidate'): - createcand = True - elif opt in ('-s', '--createsession'): - createvisit = True - elif opt in ('-i', '--idsvalidation'): - idsvalidation = True - elif opt in ('-n', '--nobidsvalidation'): - nobidsvalidation = True - elif opt in ('-a', '--nocopy'): - nocopy = True - elif opt in ('-t', '--type'): - type = arg - - # input error checking and load config_file file - config_file = load_config(profile) - input_error_checking(bids_dir, usage) - - dataset_json = bids_dir + "/dataset_description.json" - if not os.path.isfile(dataset_json) and not type: - print('No dataset_description.json found. Please run with the --type option.') - print(usage) - sys.exit(lib.exitcode.MISSING_ARG) - - if type and type not in ('raw', 'derivative'): - print("--type must be one of 'raw', 'derivative'") - print(usage) - sys.exit(lib.exitcode.MISSING_ARG) - - # database connection - db = Database(config_file.mysql, verbose) - db.connect() - - config_obj = Config(db, verbose) - data_dir = config_obj.get_config('dataDirBasepath') - # making sure that there is a final / in data_dir - data_dir = data_dir if data_dir.endswith('/') else data_dir + "/" - - # read and insert BIDS data - read_and_insert_bids( - bids_dir, - data_dir, - verbose, - createcand, - createvisit, - idsvalidation, - nobidsvalidation, - type, - nocopy, - db - ) - - -def input_error_checking(bids_dir, usage): - """ - Checks whether the required inputs are set and that paths are valid. - - :param bids_dir: path to the BIDS directory to parse and insert into LORIS - :type bids_dir: str - :param usage : script usage to be displayed when encountering an error - :type usage : st - """ - - if not bids_dir: - message = '\n\tERROR: you must specify a BIDS directory using -d or ' \ - '--directory option' - print(message) - print(usage) - sys.exit(lib.exitcode.MISSING_ARG) - - if not os.path.isdir(bids_dir): - message = '\n\tERROR: you must specify a valid BIDS directory.\n' + \ - bids_dir + ' does not exist!' - print(message) - print(usage) - sys.exit(lib.exitcode.INVALID_PATH) - - -def read_and_insert_bids( - bids_dir, data_dir, verbose, createcand, createvisit, - idsvalidation, nobidsvalidation, type, nocopy, db -): - """ - Read the provided BIDS structure and import it into the database. - - :param bids_dir : path to the BIDS directory - :type bids_dir : str - :param data_dir : data_dir config value - :type data_dir : string - :param verbose : flag for more printing if set - :type verbose : bool - :param createcand : allow database candidate creation if it did not exist already - :type createcand : bool - :param createvisit : allow database visit creation if it did not exist already - :type createvisit : bool - :param idsvalidation : allow pscid/candid validation in the BIDS directory name - :type idsvalidation : bool - :param nobidsvalidation : disable bids dataset validation - :type nobidsvalidation : bool - :param type : raw | derivative. Type of the dataset - :type type : string - :param nocopy : disable bids dataset copy in assembly_bids - :type nocopy : bool - :param db : db object - :type db : object - - """ - - # grep config settings from the Config module - config_obj = Config(db, verbose) - default_bids_vl = config_obj.get_config('default_bids_vl') - - # Validate that pscid and candid matches - if idsvalidation: - validateids(bids_dir, db, verbose) - - # load the BIDS directory - if nobidsvalidation: - bids_reader = BidsReader(bids_dir, verbose, False) - else: - bids_reader = BidsReader(bids_dir, verbose) - if not bids_reader.participants_info \ - or not bids_reader.cand_sessions_list \ - or not bids_reader.cand_session_modalities_list: - message = '\n\tERROR: could not properly parse the following' \ - 'BIDS directory:' + bids_dir + '\n' - print(message) - sys.exit(lib.exitcode.UNREADABLE_FILE) - - loris_bids_root_dir = None - if not nocopy: - # create the LORIS_BIDS directory in data_dir based on Name and BIDS version - loris_bids_root_dir = create_loris_bids_directory( - bids_reader, data_dir, verbose - ) - - # Assumption all same project (for project-wide tags) - single_project_id = None - - # loop through subjects - for bids_subject_info in bids_reader.participants_info: - - # greps BIDS information for the candidate - bids_id = bids_subject_info['participant_id'] - bids_sessions = bids_reader.cand_sessions_list[bids_id] - - # greps BIDS candidate's info from LORIS (creates the candidate if it - # does not exist yet in LORIS and the createcand flag is set to true) - loris_cand_info = grep_or_create_candidate_db_info( - bids_reader, bids_id, db, createcand, verbose - ) - - if not nocopy: - # create the candidate's directory in the LORIS BIDS import directory - lib.utilities.create_dir(loris_bids_root_dir + "sub-" + bids_id, verbose) - - cand_id = loris_cand_info['CandID'] - center_id = loris_cand_info['RegistrationCenterID'] - project_id = loris_cand_info['RegistrationProjectID'] - single_project_id = project_id - - cohort_id = None - # TODO: change subproject -> cohort in participants.tsv? - if 'subproject' in bids_subject_info: - # TODO: change subproject -> cohort in participants.tsv? - cohort = bids_subject_info['subproject'] - cohort_info = db.pselect( - "SELECT CohortID FROM cohort WHERE title = %s", - [cohort, ] - ) - if len(cohort_info) > 0: - cohort_id = cohort_info[0]['CohortID'] - - # greps BIDS session's info for the candidate from LORIS (creates the - # session if it does not exist yet in LORIS and the createvisit is set - # to true. If no visit in BIDS structure, then use default visit_label - # stored in the Config module) - grep_candidate_sessions_info( - bids_sessions, bids_id, cand_id, loris_bids_root_dir, - createvisit, verbose, db, default_bids_vl, - center_id, project_id, cohort_id, nocopy - ) - - # Import root-level (dataset-wide) events.json - # Assumption: Single project for project-wide tags - bids_layout = bids_reader.bids_layout - root_event_metadata_file = bids_layout.get_nearest( - bids_dir, - return_type='tuple', - strict=False, - extension='json', - suffix='events', - all_=False, - subject=None, - session=None - ) - - dataset_tag_dict = {} - if not root_event_metadata_file: - message = '\nWARNING: no events metadata files (events.json) in ' \ - 'root directory' - print(message) - else: - # copy the event file to the LORIS BIDS import directory - copy_file = str.replace( - root_event_metadata_file.path, - bids_layout.root, - "" - ).lstrip('/') - - if not nocopy: - event_metadata_path = loris_bids_root_dir + copy_file - lib.utilities.copy_file(root_event_metadata_file.path, event_metadata_path, verbose) - - # TODO: Move - hed_query = 'SELECT * FROM hed_schema_nodes WHERE 1' - hed_union = db.pselect(query=hed_query, args=()) - - # load json data - with open(root_event_metadata_file.path) as metadata_file: - event_metadata = json.load(metadata_file) - blake2 = compute_file_blake2b_hash(root_event_metadata_file.path) - physio = lib.physiological.Physiological(db, verbose) - _, dataset_tag_dict = physio.insert_event_metadata( - event_metadata=event_metadata, - event_metadata_file=event_metadata_path, - physiological_file_id=None, - project_id=single_project_id, - blake2=blake2, - project_wide=True, - hed_union=hed_union - ) - - # read list of modalities per session / candidate and register data - for row in bids_reader.cand_session_modalities_list: - bids_session = row['bids_ses_id'] - visit_label = bids_session if bids_session else default_bids_vl - loris_bids_visit_rel_dir = 'sub-' + row['bids_sub_id'] + '/' + 'ses-' + visit_label - - for modality in row['modalities']: - loris_bids_modality_rel_dir = loris_bids_visit_rel_dir + '/' + modality + '/' - if not nocopy: - lib.utilities.create_dir(loris_bids_root_dir + loris_bids_modality_rel_dir, verbose) - - if modality == 'eeg' or modality == 'ieeg': - Eeg( - bids_reader = bids_reader, - bids_sub_id = row['bids_sub_id'], - bids_ses_id = row['bids_ses_id'], - bids_modality = modality, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_eeg_rel_dir = loris_bids_modality_rel_dir, - loris_bids_root_dir = loris_bids_root_dir, - dataset_tag_dict = dataset_tag_dict, - dataset_type = type - ) - - elif modality in ['anat', 'dwi', 'fmap', 'func']: - Mri( - bids_reader = bids_reader, - bids_sub_id = row['bids_sub_id'], - bids_ses_id = row['bids_ses_id'], - bids_modality = modality, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_mri_rel_dir = loris_bids_modality_rel_dir, - loris_bids_root_dir = loris_bids_root_dir - ) - - # disconnect from the database - db.disconnect() - - -def validateids(bids_dir, db, verbose): - """ - Validate that pscid and candid matches - - :param bids_dir : path to the BIDS directory - :type bids_dir : str - :param db : database handler object - :type db : object - :param verbose : flag for more printing if set - :type verbose : bool - """ - - bids_folder = bids_dir.rstrip('/').split('/')[-1] - bids_folder_parts = bids_folder.split('_') - psc_id = bids_folder_parts[0] - cand_id = bids_folder_parts[1] - - candidate = Candidate(verbose, cand_id=cand_id) - loris_cand_info = candidate.get_candidate_info_from_loris(db) - - if not loris_cand_info: - print("ERROR: could not find a candidate with cand_id " + cand_id + ".") - sys.exit(lib.exitcode.CANDID_NOT_FOUND) - if loris_cand_info['PSCID'] != psc_id: - print("ERROR: cand_id " + cand_id + " and psc_id " + psc_id + " do not match.") - sys.exit(lib.exitcode.CANDIDATE_MISMATCH) - - -def create_loris_bids_directory(bids_reader, data_dir, verbose): - """ - Creates the LORIS BIDS import root directory (with name and BIDS version) - and copy over the dataset_description.json, README and participants.tsv - files. - - :param bids_reader: BIDS information handler object - :type bids_reader: object - :param data_dir : path of the LORIS data directory - :type data_dir : str - :param verbose : if true, prints out information while executing - :type verbose : bool - - :return: path to the LORIS BIDS import root directory - :rtype: str - """ - - # making sure that there is a final / in bids_dir - bids_dir = bids_reader.bids_dir - bids_dir = bids_dir if bids_dir.endswith('/') else bids_dir + "/" - - # determine the root directory of the LORIS BIDS and create it if does not exist - name = re.sub(r"[^0-9a-zA-Z]+", "_", bids_reader.dataset_name) # get name of the dataset - version = re.sub(r"[^0-9a-zA-Z\.]+", "_", bids_reader.bids_version) # get BIDSVersion of the dataset - - # the LORIS BIDS directory will be in data_dir/BIDS/ and named with the - # concatenation of the dataset name and the BIDS version - loris_bids_dirname = lib.utilities.create_dir( - data_dir + "bids_imports/" + name + "_BIDSVersion_" + version + "/", - verbose - ) - - # copy the dataset JSON file to the new directory - lib.utilities.copy_file( - bids_dir + "dataset_description.json", - loris_bids_dirname + "dataset_description.json", - verbose - ) - - # copy the README file to the new directory - if os.path.isfile(bids_dir + "README"): - lib.utilities.copy_file( - bids_dir + "README", - loris_bids_dirname + "README", - verbose - ) - - # copy the participant.tsv file to the new directory - if os.path.exists(loris_bids_dirname + "participants.tsv"): - lib.utilities.append_to_tsv_file( - bids_dir + "participants.tsv", - loris_bids_dirname + "participants.tsv", - "participant_id", - verbose - ) - else: - lib.utilities.copy_file( - bids_dir + "participants.tsv", - loris_bids_dirname + "participants.tsv", - verbose - ) - - return loris_bids_dirname - - -def grep_or_create_candidate_db_info(bids_reader, bids_id, db, createcand, verbose): - """ - Greps (or creates if candidate does not exist and createcand is true) the - BIDS candidate in the LORIS candidate's table and return a list of - candidates with their related fields from the database. - - :param bids_reader : BIDS information handler object - :type bids_reader : object - :param bids_id : bids_id to be used (CandID or PSCID) - :type bids_id : str - :param db : database handler object - :type db : object - :param createcand : if true, creates the candidate in LORIS - :type createcand : bool - :param verbose : if true, prints out information while executing - :type verbose : bool - - :return: list of candidate's dictionaries. One entry in the list holds - a dictionary with field's values from the candidate table - :rtype: list - """ - - candidate = Candidate(verbose=verbose, cand_id=bids_id) - loris_cand_info = candidate.get_candidate_info_from_loris(db) - - if not loris_cand_info: - candidate = Candidate(verbose, psc_id=bids_id) - loris_cand_info = candidate.get_candidate_info_from_loris(db) - - if not loris_cand_info and createcand: - loris_cand_info = candidate.create_candidate( - db, bids_reader.participants_info - ) - if not loris_cand_info: - print("Creating candidate failed. Cannot importing the files.\n") - sys.exit(lib.exitcode.CANDIDATE_CREATION_FAILURE) - - if not loris_cand_info: - print("Candidate " + bids_id + " not found. You can retry with the --createcandidate option.\n") - sys.exit(lib.exitcode.CANDIDATE_NOT_FOUND) - - return loris_cand_info - - -def grep_or_create_session_db_info( - bids_id, cand_id, visit_label, - db, createvisit, verbose, loris_bids_dir, - center_id, project_id, cohort_id, nocopy): - """ - Greps (or creates if session does not exist and createvisit is true) the - BIDS session in the LORIS session's table and return a list of - sessions with their related fields from the database. - - :parma bids_id : BIDS ID of the session - :type bids_id : str - :param cand_id : CandID to use to create the session - :type cand_id : int - :param visit_label : Visit label to use to create the session - :type visit_label : str - :param db : database handler object - :type db : object - :param createvisit : if true, creates the session in LORIS - :type createvisit : bool - :param verbose : if true, prints out information while executing - :type verbose : bool - :param loris_bids_dir: LORIS BIDS import root directory to copy data - :type loris_bids_dir: str - :param center_id : CenterID to use to create the session - :type center_id : int - :param project_id : ProjectID to use to create the session - :type project_id : int - :param cohort_id : CohortID to use to create the session - :type cohort_id : int - :param nocopy : if true, skip the assembly_bids dataset copy - :type nocopy : bool - - :return: session information grepped from LORIS for cand_id and visit_label - :rtype: dict - """ - - session = Session(db, verbose, cand_id, visit_label, center_id, project_id, cohort_id) - loris_vl_info = session.get_session_info_from_loris() - - if not loris_vl_info and createvisit: - loris_vl_info = session.create_session() - - if not nocopy: - # create the visit directory for in the candidate folder of the LORIS - # BIDS import directory - lib.utilities.create_dir( - loris_bids_dir + "sub-" + bids_id + "/ses-" + visit_label, - verbose - ) - - return loris_vl_info - - -def grep_candidate_sessions_info(bids_ses, bids_id, cand_id, loris_bids_dir, - createvisit, verbose, db, default_vl, - center_id, project_id, cohort_id, nocopy): - """ - Greps all session info dictionaries for a given candidate and aggregates - them into a list, with one entry per session. If the session does not - exist in LORIS and that createvisit is true, it will create the session - first. - - :param bids_ses : list of BIDS sessions to grep info or insert - :type bids_ses : list - :param bids_id : BIDS ID of the candidate - :type bids_id : str - :param cand_id : candidate's CandID - :type cand_id : int - :param loris_bids_dir: LORIS BIDS import root directory to copy data - :type loris_bids_dir: str - :param createvisit : if true, creates the visits in LORIS - :type createvisit : bool - :param verbose : if true, prints out information while executing - :type verbose : bool - :param db : database handler object - :type db : object - :param default_vl : default visit label from the Config module - :type default_vl : str - :param center_id : center ID associated to the candidate and visit - :type center_id : int - :param project_id : project ID associated to the candidate and visit - :type project_id : int - :param cohort_id : cohort ID associated to the candidate and visit - :type cohort_id : int - :param nocopy : if true, skip the assembly_bids dataset copy - :type nocopy : bool - - - - :return: list of all session's dictionaries for a given candidate - :rtype: list - """ - - loris_sessions_info = [] - - if not bids_ses: - loris_ses_info = grep_or_create_session_db_info( - bids_id, cand_id, default_vl, db, - createvisit, verbose, loris_bids_dir, - center_id, project_id, cohort_id, nocopy - ) - loris_sessions_info.append(loris_ses_info) - else: - for visit_label in bids_ses: - loris_ses_info = grep_or_create_session_db_info( - bids_id, cand_id, visit_label, db, - createvisit, verbose, loris_bids_dir, - center_id, project_id, cohort_id, nocopy - ) - loris_sessions_info.append(loris_ses_info) - - return loris_sessions_info - - -if __name__ == "__main__": - main() diff --git a/python/scripts/import_bids_dataset.py b/python/scripts/import_bids_dataset.py new file mode 100755 index 000000000..7267f1608 --- /dev/null +++ b/python/scripts/import_bids_dataset.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python + +"""Script to import BIDS structure into LORIS.""" + +import os +from typing import Any + +import lib.exitcode +from lib.import_bids_dataset.args import Args +from lib.import_bids_dataset.main import import_bids_dataset +from lib.logging import log_error_exit +from lib.lorisgetopt import LorisGetOpt +from lib.make_env import make_env + + +def pack_args(options_dict: dict[str, Any]) -> Args: + return Args( + source_bids_path = os.path.normpath(options_dict['directory']['value']), + type = options_dict['type']['value'], + bids_validation = not options_dict['nobidsvalidation']['value'], + create_candidate = options_dict['createcandidate']['value'], + create_session = options_dict['createsession']['value'], + copy = not options_dict['nocopy']['value'], + verbose = options_dict['verbose']['value'], + ) + + +# to limit the traceback when raising exceptions. +# sys.tracebacklimit = 0 + +def main(): + usage = ( + "\n" + "usage : bids_import -d -p \n" + "\n" + "options: \n" + "\t-p, --profile : name of the python database config file in dicom-archive/.loris-mri\n" + "\t-d, --directory : BIDS directory to parse & insert into LORIS\n" + "\t If directory is within $data_dir/assembly_bids, no copy will be performed\n" + "\t-c, --createcandidate : to create BIDS candidates in LORIS (optional)\n" + "\t-s, --createsession : to create BIDS sessions in LORIS (optional)\n" + "\t-b, --nobidsvalidation : to disable BIDS validation for BIDS compliance\n" + "\t-a, --nocopy : to disable dataset copy in data assembly_bids\n" + "\t-t, --type : raw | derivative. Specify the dataset type.\n" + "\t If not set, the pipeline will look for both raw and derivative files.\n" + "\t Required if no dataset_description.json is found.\n" + "\t-v, --verbose : be verbose\n" + ) + + options_dict = { + "profile": { + "value": None, "required": False, "expect_arg": True, "short_opt": "p", "is_path": False + }, + "directory": { + "value": None, "required": True, "expect_arg": True, "short_opt": "d", "is_path": True + }, + "createcandidate": { + "value": False, "required": False, "expect_arg": False, "short_opt": "c", "is_path": False + }, + "createsession": { + "value": False, "required": False, "expect_arg": False, "short_opt": "s", "is_path": False + }, + "nobidsvalidation": { + "value": False, "required": False, "expect_arg": False, "short_opt": "b", "is_path": False + }, + "nocopy": { + "value": False, "required": False, "expect_arg": False, "short_opt": "a", "is_path": False + }, + "type": { + "value": None, "required": False, "expect_arg": True, "short_opt": "t", "is_path": False + }, + "verbose": { + "value": False, "required": False, "expect_arg": False, "short_opt": "v", "is_path": False + }, + "help": { + "value": False, "required": False, "expect_arg": False, "short_opt": "h", "is_path": False + }, + } + + # Get the CLI arguments and initiate the environment. + + loris_getopt_obj = LorisGetOpt(usage, options_dict, os.path.basename(__file__[:-3])) + + env = make_env(loris_getopt_obj) + + # Check the CLI arguments. + + type = loris_getopt_obj.options_dict['type']['value'] + if type not in (None, 'raw', 'derivative'): + log_error_exit( + env, + f"--type must be one of 'raw', 'derivative'\n{usage}", + lib.exitcode.MISSING_ARG, + ) + + args = pack_args(loris_getopt_obj.options_dict) + + # read and insert BIDS data + import_bids_dataset( + env, + args, + loris_getopt_obj.db, + ) + + print("Success !") + + +if __name__ == '__main__': + main()