Skip to content

Commit

Permalink
add --force-cells option
Browse files Browse the repository at this point in the history
  • Loading branch information
youyupei committed Jun 22, 2024
1 parent 5680b8c commit d0b1b2b
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 8 deletions.
17 changes: 13 additions & 4 deletions blaze/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def bc_search_qc_report(pass_count, args):
return print_message

def get_bc_whitelist(raw_bc_count, full_bc_whitelist=None, exp_cells=None,
count_t=None, high_sensitivity_mode=False,
count_t=None,force_cell_n=None, high_sensitivity_mode=False,
output_empty = True, empty_max_count = np.inf,
out_plot_fn = DEFAULT_KNEE_PLOT_FN, args=None):
f"""
Expand Down Expand Up @@ -192,7 +192,9 @@ def get_bc_whitelist(raw_bc_count, full_bc_whitelist=None, exp_cells=None,
count_t = args.count_threshold
high_sensitivity_mode = args.high_sensitivity_mode
out_plot_fn = args.out_plot_fn

output_empty=args.out_emptydrop_fn
empty_max_count = args.empty_max_count
force_cell_n = args.force_cells
# use the threshold function in config.py
if high_sensitivity_mode:
percentile_count_thres = high_sensitivity_threshold_calculation
Expand Down Expand Up @@ -220,10 +222,17 @@ def get_bc_whitelist(raw_bc_count, full_bc_whitelist=None, exp_cells=None,
raw_bc_count = {k:v for k,v in raw_bc_count.items() if k in whole_whitelist}

# determine real bc based on the count threshold
if force_cell_n:
if force_cell_n > len(raw_bc_count):
logger.warning(helper.warning_msg(
f"force_cells ({force_cell_n}) is larger than the number of unique barcodes found in the data ({len(raw_bc_count)})."))
count_t = 0
# count threshold is the minimum count of the top N cells
else:
count_t = sorted(list(raw_bc_count.values()))[-force_cell_n]
if count_t is not None:
knee_plot(list(raw_bc_count.values()), count_t, out_plot_fn)
cells_bc = {k:v for k,v in raw_bc_count.items() if v > count_t}

cells_bc = {k:v for k,v in raw_bc_count.items() if v >= count_t}
if not output_empty:
return cells_bc, []
else:
Expand Down
9 changes: 5 additions & 4 deletions blaze/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def update_pipeline_args(args):

else:
args.do_demultiplexing = True
output_exists = os.path.exists(args.output_fastq)
output_exists = os.path.exists(args.output_fastq) and os.path.exists(args.out_whitelist_fn)
output_time_correct = \
os.path.getmtime(args.output_fastq) > os.path.getmtime(args.out_whitelist_fn) if output_exists else True

Expand All @@ -298,7 +298,7 @@ def update_pipeline_args(args):
pipeline_summary += helper.green_msg('Demultiplexing (assign reads to cells): Yes\n', printit=False)
pipeline_summary += helper.warning_msg(f"\t*NOTE: {args.output_fastq} will be overwritten.",printit = False)

if not args.overwrite and output_exists:
elif not args.overwrite and output_exists:
args.do_demultiplexing = False
pipeline_summary += textwrap.dedent(
f"""
Expand All @@ -310,7 +310,7 @@ def update_pipeline_args(args):
pipeline_summary += helper.warning_msg(
f"\tWarning: some of these files are older than the whitelist {args.out_whitelist_fn}.\n"
)
if not args.out_whitelist_fn:
elif not args.out_whitelist_fn:
args.do_demultiplexing = False
pipeline_summary += textwrap.dedent(
f"""
Expand All @@ -320,7 +320,8 @@ def update_pipeline_args(args):
else:
args.do_demultiplexing = True
pipeline_summary += helper.green_msg('Demultiplexing (assign reads to cells): Yes\n', printit=False)
pipeline_summary += f"\t*Barcode list for demultiplexing: {args.out_whitelist_fn}"
if args.known_bc_list:
pipeline_summary += f"\t*Barcode list for demultiplexing: {args.out_whitelist_fn}"

pipeline_summary += "\n" + '#'*40 + "\n"
return args, pipeline_summary
Expand Down

0 comments on commit d0b1b2b

Please sign in to comment.