15
15
from sklearn .mixture import GaussianMixture
16
16
17
17
from numpy .typing import NDArray
18
- from typing import Iterable , Literal , Optional , Union
18
+ from typing import Iterable , Literal
19
19
20
20
from strkit_rust_ext import (
21
21
CandidateSNVs ,
28
28
)
29
29
30
30
from strkit .call .allele import CallDict , call_alleles
31
- from strkit .utils import apply_or_none
31
+ from strkit .utils import idx_0_getter , apply_or_none
32
32
33
33
from .align_matrix import match_score
34
34
from .cigar import decode_cigar_np
45
45
from .types import (
46
46
VCFContigFormat , AssignMethod , AssignMethodWithHP , ConsensusMethod , ReadDict , ReadDictExtra , CalledSNV , LocusResult
47
47
)
48
- from .utils import (
49
- idx_0_getter , cn_getter , find_pair_by_ref_pos , normalize_contig , get_new_seed , calculate_seq_with_wildcards
50
- )
48
+ from .utils import cn_getter , find_pair_by_ref_pos , normalize_contig , get_new_seed , calculate_seq_with_wildcards
51
49
52
50
53
51
__all__ = [
@@ -224,12 +222,12 @@ def call_alleles_with_haplotags(
224
222
# ---
225
223
logger_ : logging .Logger ,
226
224
locus_log_str : str ,
227
- ) -> Optional [ dict ] :
225
+ ) -> dict | None :
228
226
n_alleles : int = len (haplotags )
229
227
230
228
hp_reads : list [tuple [ReadDict , ...]] = []
231
229
cns : list [NDArray [np .int32 ]] = []
232
- c_ws : list [Union [ NDArray [np .int_ ], NDArray [np .float_ ] ]] = []
230
+ c_ws : list [NDArray [np .int_ ] | NDArray [np .float_ ]] = []
233
231
234
232
for hi , hp in enumerate (haplotags ):
235
233
# Find reads for cluster
@@ -315,13 +313,13 @@ def _determine_snv_call_phase_set(
315
313
# ---
316
314
logger_ : logging .Logger ,
317
315
locus_log_str : str ,
318
- ) -> Optional [ int ] :
316
+ ) -> int | None :
319
317
# May mutate: cdd_ordered
320
318
321
319
# We may need to re-order (flip) calls based on SNVs. Check each SNV to see if it's in the SNV genotype/phase-set
322
320
# dictionary; otherwise, assign a phase set to all reads which have been used for peak calling here.
323
321
324
- call_phase_set : Optional [ int ]
322
+ call_phase_set : int | None
325
323
326
324
snv_pss_with_should_flip : list [tuple [int , bool ]] = []
327
325
@@ -464,7 +462,7 @@ def call_alleles_with_incorporated_snvs(
464
462
rng : np .random .Generator ,
465
463
logger_ : logging .Logger ,
466
464
locus_log_str : str ,
467
- ) -> tuple [AssignMethod , Optional [ tuple [dict , list [CalledSNV ]]] ]:
465
+ ) -> tuple [AssignMethod , tuple [dict , list [CalledSNV ]] | None ]:
468
466
assign_method : AssignMethod = "dist"
469
467
470
468
# TODO: parametrize min 'enough to do pure SNV haplotyping' thresholds
@@ -479,7 +477,7 @@ def call_alleles_with_incorporated_snvs(
479
477
480
478
for read_item in read_dict_items :
481
479
rn , read = read_item
482
- snv_bases : Optional [ tuple [tuple [str , int ], ...]] = read_dict_extra [rn ].get ("snv_bases" )
480
+ snv_bases : tuple [tuple [str , int ], ...] | None = read_dict_extra [rn ].get ("snv_bases" )
483
481
484
482
if snv_bases is None :
485
483
read_dict_items_with_no_snvs .append (read_item )
@@ -597,7 +595,7 @@ def call_alleles_with_incorporated_snvs(
597
595
cdd : list [CallDict ] = []
598
596
599
597
for ci in cluster_indices :
600
- cc : Optional [ CallDict ] = call_alleles (
598
+ cc : CallDict | None = call_alleles (
601
599
cns [ci ], EMPTY_NP_ARRAY , # Don't bother separating by strand for now...
602
600
c_ws [ci ], (),
603
601
params ,
@@ -671,7 +669,7 @@ def call_alleles_with_incorporated_snvs(
671
669
# - cdd_ordered
672
670
# - called_useful_snvs
673
671
674
- call_phase_set : Optional [ int ] = _determine_snv_call_phase_set (
672
+ call_phase_set : int | None = _determine_snv_call_phase_set (
675
673
read_dict ,
676
674
cdd_ordered ,
677
675
called_useful_snvs ,
@@ -724,11 +722,11 @@ def _calc_motif_size_kmers(tr_read_seq_wc: str, tr_len: int, motif_size: int):
724
722
yield tr_read_seq_wc [i :i + motif_size ]
725
723
726
724
727
- def _ndarray_serialize (x : Iterable ) -> list [Union [ int , np .int_ ] ]:
725
+ def _ndarray_serialize (x : Iterable ) -> list [int | np .int_ ]:
728
726
return list (map (round , x ))
729
727
730
728
731
- def _nested_ndarray_serialize (x : Iterable ) -> list [list [Union [ int , np .int_ ] ]]:
729
+ def _nested_ndarray_serialize (x : Iterable ) -> list [list [int | np .int_ ]]:
732
730
return list (map (_ndarray_serialize , x ))
733
731
734
732
@@ -755,13 +753,13 @@ def call_locus(
755
753
logger_ : logging .Logger ,
756
754
locus_log_str : str ,
757
755
# ---
758
- snv_vcf_file : Optional [ STRkitVCFReader ] = None ,
756
+ snv_vcf_file : STRkitVCFReader | None = None ,
759
757
snv_vcf_contigs : tuple [str , ...] = (),
760
758
snv_vcf_file_format : VCFContigFormat = "" ,
761
759
# ---
762
760
read_file_has_chr : bool = True ,
763
761
ref_file_has_chr : bool = True ,
764
- ) -> Optional [ LocusResult ] :
762
+ ) -> LocusResult | None :
765
763
call_timer = time .perf_counter ()
766
764
767
765
# params de-structuring ------------
@@ -870,7 +868,7 @@ def call_locus(
870
868
ref_max_iters = 50
871
869
ref_local_search_range = 1
872
870
873
- ref_cn : Union [ int , float ]
871
+ ref_cn : int | float
874
872
(ref_cn , _ ), l_offset , r_offset , r_n_is , (ref_left_flank_seq , ref_seq , ref_right_flank_seq ) = get_ref_repeat_count (
875
873
ref_est_cn ,
876
874
ref_seq ,
@@ -946,7 +944,7 @@ def get_read_length_partition_mean(p_idx: int) -> float:
946
944
947
945
# Find candidate SNVs, if we're using SNV data
948
946
949
- candidate_snvs : Optional [ CandidateSNVs ] = None # Lookup dictionary for candidate SNVs by position
947
+ candidate_snvs : CandidateSNVs | None = None # Lookup dictionary for candidate SNVs by position
950
948
if n_overlapping_reads and should_incorporate_snvs and snv_vcf_file :
951
949
# ^^ n_overlapping_reads check since otherwise we will have invalid left/right_most_coord
952
950
candidate_snvs = snv_vcf_file .get_candidate_snvs (
@@ -991,8 +989,8 @@ def get_read_length_partition_mean(p_idx: int) -> float:
991
989
right_flank_start = - 1
992
990
right_flank_end = - 1
993
991
994
- q_coords : Optional [ NDArray [np .uint64 ]] = None
995
- r_coords : Optional [ NDArray [np .uint64 ]] = None
992
+ q_coords : NDArray [np .uint64 ] | None = None
993
+ r_coords : NDArray [np .uint64 ] | None = None
996
994
997
995
# Soft-clipping in large insertions can result from mapping difficulties.
998
996
# If we have a soft clip which overlaps with our TR region (+ flank), we can try to recover it
0 commit comments