|
80 | 80 | significant_clip_threshold = 100
|
81 | 81 | significant_clip_snv_take_in = 250
|
82 | 82 |
|
| 83 | +# above large_consensus_length, the number of reads used for consensus is limited to max_n_large_consensus_reads |
| 84 | +large_consensus_length: int = 2000 |
| 85 | +max_n_large_consensus_reads: int = 20 |
| 86 | + |
83 | 87 | # maximum median number of bases before we can't use POA for consensus anymore due to performance:
|
84 |
| -max_mdn_poa_length = 5000 |
| 88 | +max_mdn_poa_length: int = 5000 |
85 | 89 |
|
86 | 90 |
|
87 | 91 | # property getters & other partials
|
@@ -1563,14 +1567,12 @@ def get_read_length_partition_mean(p_idx: int) -> float:
|
1563 | 1567 |
|
1564 | 1568 | if call_data and consensus:
|
1565 | 1569 | def _consensi_for_key(k: Literal["_tr_seq", "_start_anchor"]):
|
1566 |
| - return map( |
1567 |
| - lambda a: consensus_seq( |
1568 |
| - list(map(lambda rr: read_dict_extra[rr][k], a)), |
1569 |
| - logger_, |
1570 |
| - max_mdn_poa_length, |
1571 |
| - ), |
1572 |
| - allele_reads, |
1573 |
| - ) |
| 1570 | + for a in allele_reads: |
| 1571 | + seqs = list(map(lambda rr: read_dict_extra[rr][k], a)) |
| 1572 | + if seqs and len(seqs[0]) > large_consensus_length: |
| 1573 | + # if we're dealing with large sequences, use a subset of the reads to prevent stalling out. |
| 1574 | + seqs = seqs[:max_n_large_consensus_reads] |
| 1575 | + yield consensus_seq(seqs, logger_, max_mdn_poa_length) |
1574 | 1576 |
|
1575 | 1577 | call_seqs.extend(_consensi_for_key("_tr_seq"))
|
1576 | 1578 | call_anchor_seqs.extend(_consensi_for_key("_start_anchor"))
|
|
0 commit comments