Skip to content

Commit 84942ce

Browse files
committed
fixes issue-511 and incorporates code from @ptth222 from PR #553'
1 parent db3c4a9 commit 84942ce

File tree

13 files changed

+452
-65
lines changed

13 files changed

+452
-65
lines changed

isatools/constants.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@
6868

6969
QUALIFIER_LABELS = [
7070
'Protocol REF',
71+
'Performer',
72+
'Date',
7173
'Material Type',
7274
'Term Source REF',
7375
'Term Accession Number',
@@ -80,6 +82,8 @@
8082

8183
ALL_LABELS.append('Protocol REF')
8284
ALL_LABELS.append('Label')
85+
ALL_LABELS.append('Performer')
86+
ALL_LABELS.append('Date')
8387

8488
_LABELS_ASSAY_NODES = [
8589
'Assay Name',

isatools/isatab/dump/write.py

Lines changed: 80 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
)
1717
from isatools.isatab.defaults import log
1818
from isatools.isatab.graph import _all_end_to_end_paths, _longest_path_and_attrs
19+
from isatools.model.utils import _build_paths_and_indexes
1920
from isatools.isatab.utils import (
2021
get_comment_column,
2122
get_pv_columns,
@@ -260,24 +261,24 @@ def flatten(current_list):
260261

261262
columns = []
262263

263-
# start_nodes, end_nodes = _get_start_end_nodes(a_graph)
264-
paths = _all_end_to_end_paths(
265-
a_graph, [x for x in a_graph.nodes()
266-
if isinstance(a_graph.indexes[x], Sample)])
264+
paths, indexes = _build_paths_and_indexes(assay_obj.process_sequence)
265+
267266
if len(paths) == 0:
268267
log.info("No paths found, skipping writing assay file")
269268
continue
270-
if _longest_path_and_attrs(paths, a_graph.indexes) is None:
269+
if _longest_path_and_attrs(paths, indexes) is None:
271270
raise IOError(
272271
"Could not find any valid end-to-end paths in assay graph")
273272

274273
protocol_in_path_count = 0
275-
for node_index in _longest_path_and_attrs(paths, a_graph.indexes):
276-
node = a_graph.indexes[node_index]
274+
output_label_in_path_counts = {}
275+
name_label_in_path_counts = {}
276+
header_count: dict[str, int] = {}
277+
278+
for node_index in _longest_path_and_attrs(paths, indexes):
279+
node = indexes[node_index]
277280
if isinstance(node, Sample):
278281
olabel = "Sample Name"
279-
# olabel = "Sample Name.{}".format(sample_in_path_count)
280-
# sample_in_path_count += 1
281282
columns.append(olabel)
282283
columns += flatten(
283284
map(lambda x: get_comment_column(olabel, x),
@@ -305,30 +306,25 @@ def flatten(current_list):
305306
protocol_type = node.executes_protocol.protocol_type.term.lower()
306307
else:
307308
protocol_type = node.executes_protocol.protocol_type.lower()
308-
309-
if protocol_type in protocol_types_dict and\
310-
protocol_types_dict[protocol_type][HEADER]:
309+
310+
if protocol_type in protocol_types_dict and protocol_types_dict[protocol_type][HEADER]:
311311
oname_label = protocol_types_dict[protocol_type][HEADER]
312-
else:
313-
oname_label = None
314-
315-
if oname_label is not None:
316-
columns.append(oname_label)
317-
318-
if node.executes_protocol.protocol_type.term.lower() in \
319-
protocol_types_dict["nucleic acid hybridization"][SYNONYMS]:
320-
columns.append("Array Design REF")
321-
312+
313+
if oname_label not in name_label_in_path_counts:
314+
name_label_in_path_counts[oname_label] = 0
315+
header_count[oname_label] = 0
316+
new_oname_label = oname_label + "." + str(name_label_in_path_counts[oname_label])
317+
318+
columns.append(new_oname_label)
319+
name_label_in_path_counts[oname_label] += 1
320+
321+
if protocol_type in protocol_types_dict["nucleic acid hybridization"][SYNONYMS]:
322+
columns.extend(["Array Design REF"])
323+
322324
columns += flatten(
323325
map(lambda x: get_comment_column(olabel, x),
324326
node.comments))
325-
326-
for output in [x for x in node.outputs if isinstance(x, DataFile)]:
327-
if output.label not in columns:
328-
columns.append(output.label)
329-
columns += flatten(
330-
map(lambda x: get_comment_column(output.label, x),
331-
output.comments))
327+
print(columns)
332328
elif isinstance(node, Material):
333329
olabel = node.type
334330
columns.append(olabel)
@@ -340,7 +336,17 @@ def flatten(current_list):
340336
node.comments))
341337

342338
elif isinstance(node, DataFile):
343-
pass # handled in process
339+
# pass # handled in process
340+
output_label = node.label
341+
if output_label not in output_label_in_path_counts:
342+
output_label_in_path_counts[output_label] = 0
343+
new_output_label = output_label + "." + str(output_label_in_path_counts[output_label])
344+
345+
columns.append(new_output_label)
346+
output_label_in_path_counts[output_label] += 1
347+
columns += flatten(
348+
map(lambda x: get_comment_column(new_output_label, x),
349+
node.comments))
344350

345351
omap = get_object_column_map(columns, columns)
346352

@@ -355,8 +361,10 @@ def pbar(x):
355361
df_dict[k].extend([""])
356362

357363
protocol_in_path_count = 0
364+
output_label_in_path_counts = {}
365+
name_label_in_path_counts = {}
358366
for node_index in path_:
359-
node = a_graph.indexes[node_index]
367+
node = indexes[node_index]
360368
if isinstance(node, Process):
361369
olabel = "Protocol REF.{}".format(protocol_in_path_count)
362370
protocol_in_path_count += 1
@@ -366,20 +374,20 @@ def pbar(x):
366374
protocol_type = node.executes_protocol.protocol_type.term.lower()
367375
else:
368376
protocol_type = node.executes_protocol.protocol_type.lower()
369-
370-
if protocol_type in protocol_types_dict and\
371-
protocol_types_dict[protocol_type][HEADER]:
377+
378+
if protocol_type in protocol_types_dict and protocol_types_dict[protocol_type][HEADER]:
372379
oname_label = protocol_types_dict[protocol_type][HEADER]
373-
else:
374-
oname_label = None
375-
376-
if oname_label is not None:
377-
df_dict[oname_label][-1] = node.name
378380

379-
if node.executes_protocol.protocol_type.term.lower() in \
380-
protocol_types_dict["nucleic acid hybridization"][SYNONYMS]:
381+
if oname_label not in name_label_in_path_counts:
382+
name_label_in_path_counts[oname_label] = 0
383+
384+
new_oname_label = oname_label + "." + str(name_label_in_path_counts[oname_label])
385+
df_dict[new_oname_label][-1] = node.name
386+
name_label_in_path_counts[oname_label] += 1
387+
388+
if protocol_type in protocol_types_dict["nucleic acid hybridization"][SYNONYMS]:
381389
df_dict["Array Design REF"][-1] = node.array_design_ref
382-
390+
383391
if node.date is not None:
384392
df_dict[olabel + ".Date"][-1] = node.date
385393
if node.performer is not None:
@@ -391,18 +399,18 @@ def pbar(x):
391399
colabel = "{0}.Comment[{1}]".format(olabel, co.name)
392400
df_dict[colabel][-1] = co.value
393401

394-
for output in [x for x in node.outputs if isinstance(x, DataFile)]:
395-
output_by_type = []
396-
delim = ";"
397-
olabel = output.label
398-
if output.label not in columns:
399-
columns.append(output.label)
400-
output_by_type.append(output.filename)
401-
df_dict[olabel][-1] = delim.join(map(str, output_by_type))
402-
403-
for co in output.comments:
404-
colabel = "{0}.Comment[{1}]".format(olabel, co.name)
405-
df_dict[colabel][-1] = co.value
402+
# for output in [x for x in node.outputs if isinstance(x, DataFile)]:
403+
# output_by_type = []
404+
# delim = ";"
405+
# olabel = output.label
406+
# if output.label not in columns:
407+
# columns.append(output.label)
408+
# output_by_type.append(output.filename)
409+
# df_dict[olabel][-1] = delim.join(map(str, output_by_type))
410+
#
411+
# for co in output.comments:
412+
# colabel = "{0}.Comment[{1}]".format(olabel, co.name)
413+
# df_dict[colabel][-1] = co.value
406414

407415
elif isinstance(node, Sample):
408416
olabel = "Sample Name"
@@ -434,7 +442,19 @@ def pbar(x):
434442
df_dict[colabel][-1] = co.value
435443

436444
elif isinstance(node, DataFile):
437-
pass # handled in process
445+
# pass # handled in process
446+
447+
output_label = node.label
448+
if output_label not in output_label_in_path_counts:
449+
output_label_in_path_counts[output_label] = 0
450+
new_output_label = output_label + "." + str(output_label_in_path_counts[output_label])
451+
df_dict[new_output_label][-1] = node.filename
452+
output_label_in_path_counts[output_label] += 1
453+
454+
for co in node.comments:
455+
colabel = "{0}.Comment[{1}]".format(
456+
new_output_label, co.name)
457+
df_dict[colabel][-1] = co.value
438458

439459
DF = DataFrame(columns=columns)
440460
DF = DF.from_dict(data=df_dict)
@@ -482,6 +502,11 @@ def pbar(x):
482502
columns[i] = "Protocol REF"
483503
elif "." in col:
484504
columns[i] = col[:col.rindex(".")]
505+
else:
506+
for output_label in output_label_in_path_counts:
507+
if output_label in col:
508+
columns[i] = output_label
509+
break
485510

486511
log.debug("Rendered {} paths".format(len(DF.index)))
487512
if len(DF.index) > 1:
@@ -521,8 +546,6 @@ def write_value_columns(df_dict, label, x):
521546
elif x.unit.term_source.name:
522547
df_dict[label + ".Unit.Term Source REF"][-1] = x.unit.term_source.name
523548

524-
# df_dict[label + ".Unit.Term Source REF"][-1] = \
525-
# x.unit.term_source.name if x.unit.term_source else ""
526549
df_dict[label + ".Unit.Term Accession Number"][-1] = \
527550
x.unit.term_accession
528551
else:

isatools/isatab/load/ProcessSequenceFactory.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def create_from_df(self, DF):
146146
except KeyError:
147147
pass
148148

149-
for data_col in [x for x in DF.columns if x.endswith(" File")]:
149+
for data_col in [x for x in DF.columns if x in _LABELS_DATA_NODES]:
150150
filenames = [x for x in DF[data_col].drop_duplicates() if x != '']
151151
data.update(dict(map(lambda x: (':'.join([data_col, x]), DataFile(filename=x, label=data_col)), filenames)))
152152

@@ -167,7 +167,7 @@ def get_node_by_label_and_key(labl, this_key):
167167
n = samples[lk]
168168
elif labl in ('Extract Name', 'Labeled Extract Name'):
169169
n = other_material[lk]
170-
elif labl.endswith(' File'):
170+
elif labl in _LABELS_DATA_NODES:
171171
n = data[lk]
172172
return n
173173

@@ -410,7 +410,7 @@ def get_node_by_label_and_key(labl, this_key):
410410
process_key = process_keygen(protocol_ref, column_group, _cg, DF.columns, object_series, _, DF)
411411
process_key_sequence.append(process_key)
412412

413-
if object_label.endswith(' File'):
413+
if object_label in _LABELS_DATA_NODES:
414414
data_node = None
415415
try:
416416
data_node = get_node_by_label_and_key(object_label, str(object_series[object_label]))

isatools/isatab/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ def get_object_column_map(isatab_header, df_columns):
496496
"""
497497
labels = _LABELS_MATERIAL_NODES + _LABELS_DATA_NODES
498498
if set(isatab_header) == set(df_columns):
499-
object_index = [i for i, x in enumerate(df_columns) if x in labels or 'Protocol REF' in x]
499+
object_index = [i for i, x in enumerate(df_columns) if x in labels or 'Protocol REF' in x or ' File' in x]
500500
else:
501501
object_index = [i for i, x in enumerate(isatab_header) if x in labels + ['Protocol REF']]
502502

isatools/isatab/validate/rules/rules_40xx.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,8 @@ def load_table_checks(df, filename):
384384
'Extract Name',
385385
'Labeled Extract Name',
386386
'Protocol REF',
387+
'Performer',
388+
'Date',
387389
'Raw Data File',
388390
'Raw Spectral Data File',
389391
'Free Induction Decay Data File',

0 commit comments

Comments
 (0)