Skip to content

Commit 3e7f86f

Browse files
Update merge behavior to avoid overwriting scan and participants files
- Modified code to ensure scan and participants files are not overwritten. - Introduced an option to preserve either the old or the new information when conflicts arise.
1 parent c1c4fef commit 3e7f86f

1 file changed

Lines changed: 82 additions & 10 deletions

File tree

snirf2bids/snirf2bids.py

Lines changed: 82 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from warnings import warn
1010

1111
import numpy as np
12+
import pandas as pd
1213
import importlib_resources
1314
from pysnirf2 import Snirf, SnirfFormatError
1415

@@ -1260,10 +1261,10 @@ def export_to_dict(self):
12601261
sidecarname = fnames['events'].replace('.tsv', '.json')
12611262
export[sidecarname] = json.dumps(self.events.sidecar)
12621263

1263-
# participant.tsv ; participants created on front end. This won't be need. Commenting juts in case if we need this back
1264-
# fields = self.participants
1265-
# text = _tsv_to_json(fields)
1266-
# export[_make_filename_prefix(self.entities) + '_participants.tsv'] = text
1264+
# participant.tsv
1265+
fields = self.participants
1266+
text = _tsv_to_json(fields)
1267+
export['participants.tsv'] = text
12671268

12681269
# scans.tsv
12691270
fields = self.scans
@@ -1272,7 +1273,7 @@ def export_to_dict(self):
12721273

12731274
return export
12741275

1275-
def snirf2bids(path_to_snirf: str, outputpath: str = None, list_files=[]) -> str:
1276+
def snirf2bids(path_to_snirf: str, outputpath: str = None, list_files=[], retain_old_info=True) -> str:
12761277
"""Creates BIDS metadata text files from a SNIRF file
12771278
12781279
Args:
@@ -1291,13 +1292,84 @@ def snirf2bids(path_to_snirf: str, outputpath: str = None, list_files=[]) -> str
12911292
with open(os.path.join(outputpath, item), 'w', newline='') as f:
12921293
f.write(s[item])
12931294
elif item.endswith('_scans.tsv'):
1294-
with open(os.path.join(os.path.dirname(outputpath), item), 'w', newline='') as f:
1295-
f.write(s[item])
1295+
file_path = os.path.join(os.path.dirname(outputpath), item)
1296+
# print(s[item])
1297+
if not os.path.isfile(file_path):
1298+
with open(os.path.join(file_path), 'w', newline='') as f:
1299+
f.write(s[item])
1300+
else:
1301+
temp_file_path = os.path.join(os.path.dirname(outputpath), 'temp_'+item)
1302+
with open(os.path.join(temp_file_path), 'w', newline='') as f:
1303+
f.write(s[item])
1304+
df1 = pd.read_csv(file_path, sep='\t')
1305+
df2 = pd.read_csv(temp_file_path, sep='\t')
1306+
merged_df = pd.merge(df1, df2, on='filename', how='outer', suffixes=('_df1', '_df2'))
1307+
1308+
# Identify conflicting columns dynamically (columns with both '_df1' and '_df2' suffixes)
1309+
conflicting_columns = [col.split('_df1')[0] for col in merged_df.columns if col.endswith('_df1')]
1310+
1311+
# Resolve conflicts
1312+
if retain_old_info:
1313+
for col in conflicting_columns:
1314+
merged_df[col] = merged_df[f'{col}_df1'].combine_first(merged_df[f'{col}_df2'])
1315+
else:
1316+
for col in conflicting_columns:
1317+
merged_df[col] = merged_df[f'{col}_df2'].combine_first(merged_df[f'{col}_df1'])
1318+
1319+
# Drop unnecessary columns created by the merge
1320+
columns_to_drop = [f'{col}_df1' for col in conflicting_columns] + [f'{col}_df2' for col in conflicting_columns]
1321+
merged_df = merged_df.drop(columns=columns_to_drop)
1322+
1323+
merged_df = merged_df.fillna(' ')
1324+
merged_df.to_csv(file_path, sep='\t', index=False)
1325+
elif item.endswith('participants.tsv'):
1326+
sub_index = outputpath.find('sub-')
1327+
participants_path = os.path.join(outputpath[:sub_index],item)
1328+
if not os.path.isfile(participants_path):
1329+
with open(participants_path, 'w', newline='') as f:
1330+
f.write(s[item])
1331+
else:
1332+
temp_file_path = os.path.join(outputpath[:sub_index], 'temp_'+item)
1333+
with open(os.path.join(temp_file_path), 'w', newline='') as f:
1334+
f.write(s[item])
1335+
1336+
df1 = pd.read_csv(participants_path, sep='\t')
1337+
# Save the original column order of df1
1338+
original_order = df1.columns.tolist()
1339+
df2 = pd.read_csv(temp_file_path, sep='\t')
1340+
merged_df = pd.merge(df1, df2, on='participant_id', how='outer', suffixes=('_df1', '_df2'))
1341+
1342+
# Identify conflicting columns dynamically (columns with both '_df1' and '_df2' suffixes)
1343+
conflicting_columns = [col.split('_df1')[0] for col in merged_df.columns if col.endswith('_df1')]
1344+
1345+
# Resolve conflicts
1346+
if retain_old_info:
1347+
for col in conflicting_columns:
1348+
merged_df[col] = merged_df[f'{col}_df1'].combine_first(merged_df[f'{col}_df2'])
1349+
else:
1350+
for col in conflicting_columns:
1351+
merged_df[col] = merged_df[f'{col}_df2'].combine_first(merged_df[f'{col}_df1'])
1352+
1353+
# Drop unnecessary columns created by the merge
1354+
columns_to_drop = [f'{col}_df1' for col in conflicting_columns] + [f'{col}_df2' for col in conflicting_columns]
1355+
merged_df = merged_df.drop(columns=columns_to_drop)
1356+
1357+
# Add new columns from df2 that are not in df1
1358+
for col in df2.columns:
1359+
if col not in original_order:
1360+
original_order.append(col)
1361+
1362+
# Reorder the columns
1363+
merged_df = merged_df[original_order]
1364+
1365+
merged_df = merged_df.fillna(' ')
1366+
merged_df.to_csv(participants_path, sep='\t', index=False)
1367+
12961368
elif item.endswith('.tsv'):
12971369
with open(os.path.join(outputpath, item), 'w', newline='') as f:
12981370
f.write(s[item])
12991371

1300-
def snirf2bids_recurse(fpath: str, list_files=[]) -> str:
1372+
def snirf2bids_recurse(fpath: str, list_files=[], retain_old_info=True) -> str:
13011373
"""
13021374
Generates BIDS metadata text files from a SNIRF file or directory recursively.
13031375
@@ -1313,10 +1385,10 @@ def snirf2bids_recurse(fpath: str, list_files=[]) -> str:
13131385

13141386
if os.path.isdir(fpath):
13151387
for f in os.listdir(fpath):
1316-
snirf2bids_recurse(os.path.join(fpath, f), list_files=list_files)
1388+
snirf2bids_recurse(os.path.join(fpath, f), list_files=list_files, retain_old_info=retain_old_info)
13171389
elif os.path.isfile(fpath):
13181390
if fpath.endswith('.snirf'):
1319-
snirf2bids(fpath, list_files=list_files)
1391+
snirf2bids(fpath, list_files=list_files, retain_old_info=retain_old_info)
13201392

13211393
def snirf2json(path_to_snirf: str) -> str:
13221394
run = SnirfRun(fpath=path_to_snirf)

0 commit comments

Comments
 (0)