Skip to content

Commit

Permalink
add umi_len option, fix default length for v2 kits
Browse files Browse the repository at this point in the history
  • Loading branch information
youyupei committed Aug 16, 2024
1 parent 253f878 commit be32668
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 16 deletions.
3 changes: 2 additions & 1 deletion blaze/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
# input
DEFAULT_GRB_MIN_SCORE=15
DEFAULT_GRB_KIT='3v3'
DEFAULT_UMI_SIZE = 10 if DEFAULT_GRB_KIT in ['3v2', '5v2'] else 12
DEFAULT_UMI_SIZE = 12
V2_UMI_SIZE = 10
DEFAULT_BC_SIZE = 16

# The 10X barcode whitelists has been packed in the package
Expand Down
7 changes: 4 additions & 3 deletions blaze/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
logger.setLevel(logging.DEBUG)

# Parse fastq -> polyT_adaptor_finder.Read class
def get_raw_bc_from_reads(reads, min_q=0, kit=None):
def get_raw_bc_from_reads(reads, min_q=0, kit=None, **kwargs):
"""
Get putative BC from each reads from a batch of read (can be defined by batch_iterator function)
Expand Down Expand Up @@ -73,7 +73,7 @@ def get_raw_bc_from_reads(reads, min_q=0, kit=None):

# create read object
read = polyT_adaptor_finder.Read(read_id = r.id, sequence=str(r.seq),
phred_score=r.q_letter, kit=kit)
phred_score=r.q_letter, kit=kit, **kwargs)


read.get_strand_and_raw_bc()
Expand Down Expand Up @@ -347,7 +347,8 @@ def main():

rst_futures = helper.multiprocessing_submit(get_raw_bc_from_reads,
read_batchs, n_process=args.threads,
min_q=args.minQ, kit=args.kit_version)
min_q=args.minQ, kit=args.kit_version,
umi_len=args.umi_len)


raw_bc_pass_count = defaultdict(int)
Expand Down
10 changes: 7 additions & 3 deletions blaze/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,11 @@ def get_files_from_dir(fastq_dir):
help='Minimum phred score for all bases in a putative BC to define a "high quality putative barcode".')
whitelist_arg_opt.add_argument('--max-edit-distance', type=int, default=DEFAULT_ASSIGNMENT_ED,
help='Maximum edit distance allowed between a putative barcode and a barcode \nfor a read to be assigned to the barcode.')

whitelist_arg_opt.add_argument('--10x-kit-version', '--kit-version', dest="kit_version", choices=['3v4', '3v3', '3v2', '5v3', '5v2'], default=DEFAULT_GRB_KIT,
help='Choose from 10X Single Cell 3ʹ gene expression v4, v3, v2 (3v4, 3v3, 3v2) or 5ʹ gene expression v3, v2 (5v3, 5v2). If using other protocols, \n'
'please do not specify this option and specify --full-bc-whitelist instead.')

'please do not specify this option and specify --full-bc-whitelist and --umi-len instead.')
whitelist_arg_opt.add_argument('--umi-len', dest="umi_len", type=int, default=DEFAULT_UMI_SIZE,
help='UMI length, will only be used when --kit-version is not specified.')
whitelist_arg_opt.add_argument('--full-bc-whitelist',
type=lambda x: x if helper.check_files_exist(x) else None,
default=None,
Expand Down Expand Up @@ -174,12 +174,16 @@ def get_files_from_dir(fastq_dir):
'whitelist. Note that the barcodes not listed in the file will never be found.'))
elif args.kit_version == '3v4':
args.full_bc_whitelist = DEFAULT_GRB_WHITELIST_3V4
args.umi_len = DEFAULT_UMI_SIZE
elif args.kit_version == '3v3':
args.full_bc_whitelist = DEFAULT_GRB_WHITELIST_3V3
args.umi_len = DEFAULT_UMI_SIZE
elif args.kit_version == '5v3':
args.full_bc_whitelist = DEFAULT_GRB_WHITELIST_5V3
args.umi_len = DEFAULT_UMI_SIZE
elif args.kit_version == '5v2' or args.kit_version == '3v2':
args.full_bc_whitelist = DEFAULT_GRB_WHITELIST_V2
args.umi_len = V2_UMI_SIZE
else:
helper.err_msg("Error: Invalid value of --kit-version, please choose from v3 or v2 or specify --full-bc-whitelist.", printit=True)
sys.exit(1)
Expand Down
16 changes: 8 additions & 8 deletions blaze/polyT_adaptor_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,9 +293,9 @@ def adator_trimming_idx(self):
if not self._strand:
return None
elif self._strand == '+':
return int(-self.raw_bc_start-DEFAULT_BC_SIZE-DEFAULT_UMI_SIZE)
return int(-self.raw_bc_start-DEFAULT_BC_SIZE-self.umi_len)
else:
return int(self.raw_bc_start+DEFAULT_BC_SIZE+DEFAULT_UMI_SIZE)
return int(self.raw_bc_start+DEFAULT_BC_SIZE+self.umi_len)

@property
def putative_UMI(self):
Expand All @@ -305,10 +305,10 @@ def putative_UMI(self):
return None
elif self._strand == '+':
return helper.reverse_complement(
self.seq)[self.raw_bc_start+DEFAULT_BC_SIZE: self.raw_bc_start+DEFAULT_BC_SIZE+DEFAULT_UMI_SIZE]
self.seq)[self.raw_bc_start+DEFAULT_BC_SIZE: self.raw_bc_start+DEFAULT_BC_SIZE+self.umi_len]
else:
return self.seq[
self.raw_bc_start+DEFAULT_BC_SIZE: self.raw_bc_start+DEFAULT_BC_SIZE+DEFAULT_UMI_SIZE]
self.raw_bc_start+DEFAULT_BC_SIZE: self.raw_bc_start+DEFAULT_BC_SIZE+self.umi_len]

@property
def pre_bc_flanking(self):
Expand All @@ -334,12 +334,12 @@ def post_umi_flanking(self):
return None
elif self._strand == '+':
return helper.reverse_complement(
self.seq)[self.raw_bc_start+DEFAULT_BC_SIZE+DEFAULT_UMI_SIZE: \
self.raw_bc_start+DEFAULT_BC_SIZE+DEFAULT_UMI_SIZE+ DEFAULT_GRB_FLANKING_SIZE]
self.seq)[self.raw_bc_start+DEFAULT_BC_SIZE+self.umi_len: \
self.raw_bc_start+DEFAULT_BC_SIZE+self.umi_len+ DEFAULT_GRB_FLANKING_SIZE]
else:
return self.seq[
self.raw_bc_start+DEFAULT_BC_SIZE+DEFAULT_UMI_SIZE: \
self.raw_bc_start+DEFAULT_BC_SIZE+DEFAULT_UMI_SIZE+DEFAULT_GRB_FLANKING_SIZE]
self.raw_bc_start+DEFAULT_BC_SIZE+self.umi_len: \
self.raw_bc_start+DEFAULT_BC_SIZE+self.umi_len+DEFAULT_GRB_FLANKING_SIZE]

@property
def polyT_trimming_idx(self):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name='blaze2',
version='2.4.0',
version='2.4.1',
author='Yupei You',
author_email="[email protected]",
description='Barcode identification from Long reads for AnalyZing single cell gene Expression',
Expand Down

0 comments on commit be32668

Please sign in to comment.