Skip to content

Commit b93bcf3

Browse files
committed
feat(call): subset reads with long tracts for consensus
1 parent 1b02d12 commit b93bcf3

File tree

1 file changed

+11
-9
lines changed

1 file changed

+11
-9
lines changed

strkit/call/call_locus.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,12 @@
8080
significant_clip_threshold = 100
8181
significant_clip_snv_take_in = 250
8282

83+
# above large_consensus_length, the number of reads used for consensus is limited to max_n_large_consensus_reads
84+
large_consensus_length: int = 2000
85+
max_n_large_consensus_reads: int = 20
86+
8387
# maximum median number of bases before we can't use POA for consensus anymore due to performance:
84-
max_mdn_poa_length = 5000
88+
max_mdn_poa_length: int = 5000
8589

8690

8791
# property getters & other partials
@@ -1563,14 +1567,12 @@ def get_read_length_partition_mean(p_idx: int) -> float:
15631567

15641568
if call_data and consensus:
15651569
def _consensi_for_key(k: Literal["_tr_seq", "_start_anchor"]):
1566-
return map(
1567-
lambda a: consensus_seq(
1568-
list(map(lambda rr: read_dict_extra[rr][k], a)),
1569-
logger_,
1570-
max_mdn_poa_length,
1571-
),
1572-
allele_reads,
1573-
)
1570+
for a in allele_reads:
1571+
seqs = list(map(lambda rr: read_dict_extra[rr][k], a))
1572+
if seqs and len(seqs[0]) > large_consensus_length:
1573+
# if we're dealing with large sequences, use a subset of the reads to prevent stalling out.
1574+
seqs = seqs[:max_n_large_consensus_reads]
1575+
yield consensus_seq(seqs, logger_, max_mdn_poa_length)
15741576

15751577
call_seqs.extend(_consensi_for_key("_tr_seq"))
15761578
call_anchor_seqs.extend(_consensi_for_key("_start_anchor"))

0 commit comments

Comments
 (0)