16
16
)
17
17
from isatools .isatab .defaults import log
18
18
from isatools .isatab .graph import _all_end_to_end_paths , _longest_path_and_attrs
19
+ from isatools .model .utils import _build_paths_and_indexes
19
20
from isatools .isatab .utils import (
20
21
get_comment_column ,
21
22
get_pv_columns ,
@@ -260,24 +261,24 @@ def flatten(current_list):
260
261
261
262
columns = []
262
263
263
- # start_nodes, end_nodes = _get_start_end_nodes(a_graph)
264
- paths = _all_end_to_end_paths (
265
- a_graph , [x for x in a_graph .nodes ()
266
- if isinstance (a_graph .indexes [x ], Sample )])
264
+ paths , indexes = _build_paths_and_indexes (assay_obj .process_sequence )
265
+
267
266
if len (paths ) == 0 :
268
267
log .info ("No paths found, skipping writing assay file" )
269
268
continue
270
- if _longest_path_and_attrs (paths , a_graph . indexes ) is None :
269
+ if _longest_path_and_attrs (paths , indexes ) is None :
271
270
raise IOError (
272
271
"Could not find any valid end-to-end paths in assay graph" )
273
272
274
273
protocol_in_path_count = 0
275
- for node_index in _longest_path_and_attrs (paths , a_graph .indexes ):
276
- node = a_graph .indexes [node_index ]
274
+ output_label_in_path_counts = {}
275
+ name_label_in_path_counts = {}
276
+ header_count : dict [str , int ] = {}
277
+
278
+ for node_index in _longest_path_and_attrs (paths , indexes ):
279
+ node = indexes [node_index ]
277
280
if isinstance (node , Sample ):
278
281
olabel = "Sample Name"
279
- # olabel = "Sample Name.{}".format(sample_in_path_count)
280
- # sample_in_path_count += 1
281
282
columns .append (olabel )
282
283
columns += flatten (
283
284
map (lambda x : get_comment_column (olabel , x ),
@@ -305,30 +306,25 @@ def flatten(current_list):
305
306
protocol_type = node .executes_protocol .protocol_type .term .lower ()
306
307
else :
307
308
protocol_type = node .executes_protocol .protocol_type .lower ()
308
-
309
- if protocol_type in protocol_types_dict and \
310
- protocol_types_dict [protocol_type ][HEADER ]:
309
+
310
+ if protocol_type in protocol_types_dict and protocol_types_dict [protocol_type ][HEADER ]:
311
311
oname_label = protocol_types_dict [protocol_type ][HEADER ]
312
- else :
313
- oname_label = None
314
-
315
- if oname_label is not None :
316
- columns .append (oname_label )
317
-
318
- if node .executes_protocol .protocol_type .term .lower () in \
319
- protocol_types_dict ["nucleic acid hybridization" ][SYNONYMS ]:
320
- columns .append ("Array Design REF" )
321
-
312
+
313
+ if oname_label not in name_label_in_path_counts :
314
+ name_label_in_path_counts [oname_label ] = 0
315
+ header_count [oname_label ] = 0
316
+ new_oname_label = oname_label + "." + str (name_label_in_path_counts [oname_label ])
317
+
318
+ columns .append (new_oname_label )
319
+ name_label_in_path_counts [oname_label ] += 1
320
+
321
+ if protocol_type in protocol_types_dict ["nucleic acid hybridization" ][SYNONYMS ]:
322
+ columns .extend (["Array Design REF" ])
323
+
322
324
columns += flatten (
323
325
map (lambda x : get_comment_column (olabel , x ),
324
326
node .comments ))
325
-
326
- for output in [x for x in node .outputs if isinstance (x , DataFile )]:
327
- if output .label not in columns :
328
- columns .append (output .label )
329
- columns += flatten (
330
- map (lambda x : get_comment_column (output .label , x ),
331
- output .comments ))
327
+ print (columns )
332
328
elif isinstance (node , Material ):
333
329
olabel = node .type
334
330
columns .append (olabel )
@@ -340,7 +336,17 @@ def flatten(current_list):
340
336
node .comments ))
341
337
342
338
elif isinstance (node , DataFile ):
343
- pass # handled in process
339
+ # pass # handled in process
340
+ output_label = node .label
341
+ if output_label not in output_label_in_path_counts :
342
+ output_label_in_path_counts [output_label ] = 0
343
+ new_output_label = output_label + "." + str (output_label_in_path_counts [output_label ])
344
+
345
+ columns .append (new_output_label )
346
+ output_label_in_path_counts [output_label ] += 1
347
+ columns += flatten (
348
+ map (lambda x : get_comment_column (new_output_label , x ),
349
+ node .comments ))
344
350
345
351
omap = get_object_column_map (columns , columns )
346
352
@@ -355,8 +361,10 @@ def pbar(x):
355
361
df_dict [k ].extend (["" ])
356
362
357
363
protocol_in_path_count = 0
364
+ output_label_in_path_counts = {}
365
+ name_label_in_path_counts = {}
358
366
for node_index in path_ :
359
- node = a_graph . indexes [node_index ]
367
+ node = indexes [node_index ]
360
368
if isinstance (node , Process ):
361
369
olabel = "Protocol REF.{}" .format (protocol_in_path_count )
362
370
protocol_in_path_count += 1
@@ -366,20 +374,20 @@ def pbar(x):
366
374
protocol_type = node .executes_protocol .protocol_type .term .lower ()
367
375
else :
368
376
protocol_type = node .executes_protocol .protocol_type .lower ()
369
-
370
- if protocol_type in protocol_types_dict and \
371
- protocol_types_dict [protocol_type ][HEADER ]:
377
+
378
+ if protocol_type in protocol_types_dict and protocol_types_dict [protocol_type ][HEADER ]:
372
379
oname_label = protocol_types_dict [protocol_type ][HEADER ]
373
- else :
374
- oname_label = None
375
-
376
- if oname_label is not None :
377
- df_dict [oname_label ][- 1 ] = node .name
378
380
379
- if node .executes_protocol .protocol_type .term .lower () in \
380
- protocol_types_dict ["nucleic acid hybridization" ][SYNONYMS ]:
381
+ if oname_label not in name_label_in_path_counts :
382
+ name_label_in_path_counts [oname_label ] = 0
383
+
384
+ new_oname_label = oname_label + "." + str (name_label_in_path_counts [oname_label ])
385
+ df_dict [new_oname_label ][- 1 ] = node .name
386
+ name_label_in_path_counts [oname_label ] += 1
387
+
388
+ if protocol_type in protocol_types_dict ["nucleic acid hybridization" ][SYNONYMS ]:
381
389
df_dict ["Array Design REF" ][- 1 ] = node .array_design_ref
382
-
390
+
383
391
if node .date is not None :
384
392
df_dict [olabel + ".Date" ][- 1 ] = node .date
385
393
if node .performer is not None :
@@ -391,18 +399,18 @@ def pbar(x):
391
399
colabel = "{0}.Comment[{1}]" .format (olabel , co .name )
392
400
df_dict [colabel ][- 1 ] = co .value
393
401
394
- for output in [x for x in node .outputs if isinstance (x , DataFile )]:
395
- output_by_type = []
396
- delim = ";"
397
- olabel = output .label
398
- if output .label not in columns :
399
- columns .append (output .label )
400
- output_by_type .append (output .filename )
401
- df_dict [olabel ][- 1 ] = delim .join (map (str , output_by_type ))
402
-
403
- for co in output .comments :
404
- colabel = "{0}.Comment[{1}]" .format (olabel , co .name )
405
- df_dict [colabel ][- 1 ] = co .value
402
+ # for output in [x for x in node.outputs if isinstance(x, DataFile)]:
403
+ # output_by_type = []
404
+ # delim = ";"
405
+ # olabel = output.label
406
+ # if output.label not in columns:
407
+ # columns.append(output.label)
408
+ # output_by_type.append(output.filename)
409
+ # df_dict[olabel][-1] = delim.join(map(str, output_by_type))
410
+ #
411
+ # for co in output.comments:
412
+ # colabel = "{0}.Comment[{1}]".format(olabel, co.name)
413
+ # df_dict[colabel][-1] = co.value
406
414
407
415
elif isinstance (node , Sample ):
408
416
olabel = "Sample Name"
@@ -434,7 +442,19 @@ def pbar(x):
434
442
df_dict [colabel ][- 1 ] = co .value
435
443
436
444
elif isinstance (node , DataFile ):
437
- pass # handled in process
445
+ # pass # handled in process
446
+
447
+ output_label = node .label
448
+ if output_label not in output_label_in_path_counts :
449
+ output_label_in_path_counts [output_label ] = 0
450
+ new_output_label = output_label + "." + str (output_label_in_path_counts [output_label ])
451
+ df_dict [new_output_label ][- 1 ] = node .filename
452
+ output_label_in_path_counts [output_label ] += 1
453
+
454
+ for co in node .comments :
455
+ colabel = "{0}.Comment[{1}]" .format (
456
+ new_output_label , co .name )
457
+ df_dict [colabel ][- 1 ] = co .value
438
458
439
459
DF = DataFrame (columns = columns )
440
460
DF = DF .from_dict (data = df_dict )
@@ -482,6 +502,11 @@ def pbar(x):
482
502
columns [i ] = "Protocol REF"
483
503
elif "." in col :
484
504
columns [i ] = col [:col .rindex ("." )]
505
+ else :
506
+ for output_label in output_label_in_path_counts :
507
+ if output_label in col :
508
+ columns [i ] = output_label
509
+ break
485
510
486
511
log .debug ("Rendered {} paths" .format (len (DF .index )))
487
512
if len (DF .index ) > 1 :
@@ -521,8 +546,6 @@ def write_value_columns(df_dict, label, x):
521
546
elif x .unit .term_source .name :
522
547
df_dict [label + ".Unit.Term Source REF" ][- 1 ] = x .unit .term_source .name
523
548
524
- # df_dict[label + ".Unit.Term Source REF"][-1] = \
525
- # x.unit.term_source.name if x.unit.term_source else ""
526
549
df_dict [label + ".Unit.Term Accession Number" ][- 1 ] = \
527
550
x .unit .term_accession
528
551
else :
0 commit comments